1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 15 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 16 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 17 PetscCall(MatDestroy(&B)); 18 PetscFunctionReturn(0); 19 } 20 21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 22 { 23 Mat B; 24 25 PetscFunctionBegin; 26 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 27 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 28 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 29 PetscFunctionReturn(0); 30 } 31 32 /*MC 33 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 34 35 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 36 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 37 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 38 for communicators controlling multiple processes. It is recommended that you call both of 39 the above preallocation routines for simplicity. 40 41 Options Database Keys: 42 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 43 44 Developer Note: 45 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 46 enough exist. 47 48 Level: beginner 49 50 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 51 M*/ 52 53 /*MC 54 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 55 56 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 57 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 58 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 59 for communicators controlling multiple processes. It is recommended that you call both of 60 the above preallocation routines for simplicity. 61 62 Options Database Keys: 63 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 64 65 Level: beginner 66 67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 68 M*/ 69 70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 71 { 72 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 73 74 PetscFunctionBegin; 75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 76 A->boundtocpu = flg; 77 #endif 78 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 79 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 80 81 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 82 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 83 * to differ from the parent matrix. */ 84 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 85 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 86 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 91 { 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 93 94 PetscFunctionBegin; 95 if (mat->A) { 96 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 97 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 103 { 104 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 105 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 106 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 107 const PetscInt *ia, *ib; 108 const MatScalar *aa, *bb, *aav, *bav; 109 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 110 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 111 112 PetscFunctionBegin; 113 *keptrows = NULL; 114 115 ia = a->i; 116 ib = b->i; 117 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 118 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 119 for (i = 0; i < m; i++) { 120 na = ia[i + 1] - ia[i]; 121 nb = ib[i + 1] - ib[i]; 122 if (!na && !nb) { 123 cnt++; 124 goto ok1; 125 } 126 aa = aav + ia[i]; 127 for (j = 0; j < na; j++) { 128 if (aa[j] != 0.0) goto ok1; 129 } 130 bb = bav + ib[i]; 131 for (j = 0; j < nb; j++) { 132 if (bb[j] != 0.0) goto ok1; 133 } 134 cnt++; 135 ok1:; 136 } 137 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 138 if (!n0rows) { 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 140 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 141 PetscFunctionReturn(0); 142 } 143 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 144 cnt = 0; 145 for (i = 0; i < m; i++) { 146 na = ia[i + 1] - ia[i]; 147 nb = ib[i + 1] - ib[i]; 148 if (!na && !nb) continue; 149 aa = aav + ia[i]; 150 for (j = 0; j < na; j++) { 151 if (aa[j] != 0.0) { 152 rows[cnt++] = rstart + i; 153 goto ok2; 154 } 155 } 156 bb = bav + ib[i]; 157 for (j = 0; j < nb; j++) { 158 if (bb[j] != 0.0) { 159 rows[cnt++] = rstart + i; 160 goto ok2; 161 } 162 } 163 ok2:; 164 } 165 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 167 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 174 PetscBool cong; 175 176 PetscFunctionBegin; 177 PetscCall(MatHasCongruentLayouts(Y, &cong)); 178 if (Y->assembled && cong) { 179 PetscCall(MatDiagonalSet(aij->A, D, is)); 180 } else { 181 PetscCall(MatDiagonalSet_Default(Y, D, is)); 182 } 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 189 PetscInt i, rstart, nrows, *rows; 190 191 PetscFunctionBegin; 192 *zrows = NULL; 193 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 194 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 195 for (i = 0; i < nrows; i++) rows[i] += rstart; 196 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 201 { 202 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 203 PetscInt i, m, n, *garray = aij->garray; 204 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 205 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 206 PetscReal *work; 207 const PetscScalar *dummy; 208 209 PetscFunctionBegin; 210 PetscCall(MatGetSize(A, &m, &n)); 211 PetscCall(PetscCalloc1(n, &work)); 212 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 213 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 214 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 215 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 216 if (type == NORM_2) { 217 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 218 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 219 } else if (type == NORM_1) { 220 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 221 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 222 } else if (type == NORM_INFINITY) { 223 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 224 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 225 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 226 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 227 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 228 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 229 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 230 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 231 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 232 if (type == NORM_INFINITY) { 233 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 234 } else { 235 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 236 } 237 PetscCall(PetscFree(work)); 238 if (type == NORM_2) { 239 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 240 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 241 for (i = 0; i < n; i++) reductions[i] /= m; 242 } 243 PetscFunctionReturn(0); 244 } 245 246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 247 { 248 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 249 IS sis, gis; 250 const PetscInt *isis, *igis; 251 PetscInt n, *iis, nsis, ngis, rstart, i; 252 253 PetscFunctionBegin; 254 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 255 PetscCall(MatFindNonzeroRows(a->B, &gis)); 256 PetscCall(ISGetSize(gis, &ngis)); 257 PetscCall(ISGetSize(sis, &nsis)); 258 PetscCall(ISGetIndices(sis, &isis)); 259 PetscCall(ISGetIndices(gis, &igis)); 260 261 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 262 PetscCall(PetscArraycpy(iis, igis, ngis)); 263 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 264 n = ngis + nsis; 265 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 266 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 267 for (i = 0; i < n; i++) iis[i] += rstart; 268 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 269 270 PetscCall(ISRestoreIndices(sis, &isis)); 271 PetscCall(ISRestoreIndices(gis, &igis)); 272 PetscCall(ISDestroy(&sis)); 273 PetscCall(ISDestroy(&gis)); 274 PetscFunctionReturn(0); 275 } 276 277 /* 278 Local utility routine that creates a mapping from the global column 279 number to the local number in the off-diagonal part of the local 280 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 281 a slightly higher hash table cost; without it it is not scalable (each processor 282 has an order N integer array but is fast to access. 283 */ 284 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 287 PetscInt n = aij->B->cmap->n, i; 288 289 PetscFunctionBegin; 290 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 291 #if defined(PETSC_USE_CTABLE) 292 PetscCall(PetscTableCreate(n, mat->cmap->N + 1, &aij->colmap)); 293 for (i = 0; i < n; i++) PetscCall(PetscTableAdd(aij->colmap, aij->garray[i] + 1, i + 1, INSERT_VALUES)); 294 #else 295 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 296 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 297 #endif 298 PetscFunctionReturn(0); 299 } 300 301 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 302 { \ 303 if (col <= lastcol1) low1 = 0; \ 304 else high1 = nrow1; \ 305 lastcol1 = col; \ 306 while (high1 - low1 > 5) { \ 307 t = (low1 + high1) / 2; \ 308 if (rp1[t] > col) high1 = t; \ 309 else low1 = t; \ 310 } \ 311 for (_i = low1; _i < high1; _i++) { \ 312 if (rp1[_i] > col) break; \ 313 if (rp1[_i] == col) { \ 314 if (addv == ADD_VALUES) { \ 315 ap1[_i] += value; \ 316 /* Not sure LogFlops will slow dow the code or not */ \ 317 (void)PetscLogFlops(1.0); \ 318 } else ap1[_i] = value; \ 319 goto a_noinsert; \ 320 } \ 321 } \ 322 if (value == 0.0 && ignorezeroentries && row != col) { \ 323 low1 = 0; \ 324 high1 = nrow1; \ 325 goto a_noinsert; \ 326 } \ 327 if (nonew == 1) { \ 328 low1 = 0; \ 329 high1 = nrow1; \ 330 goto a_noinsert; \ 331 } \ 332 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 333 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 334 N = nrow1++ - 1; \ 335 a->nz++; \ 336 high1++; \ 337 /* shift up all the later entries in this row */ \ 338 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 339 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 340 rp1[_i] = col; \ 341 ap1[_i] = value; \ 342 A->nonzerostate++; \ 343 a_noinsert:; \ 344 ailen[row] = nrow1; \ 345 } 346 347 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 348 { \ 349 if (col <= lastcol2) low2 = 0; \ 350 else high2 = nrow2; \ 351 lastcol2 = col; \ 352 while (high2 - low2 > 5) { \ 353 t = (low2 + high2) / 2; \ 354 if (rp2[t] > col) high2 = t; \ 355 else low2 = t; \ 356 } \ 357 for (_i = low2; _i < high2; _i++) { \ 358 if (rp2[_i] > col) break; \ 359 if (rp2[_i] == col) { \ 360 if (addv == ADD_VALUES) { \ 361 ap2[_i] += value; \ 362 (void)PetscLogFlops(1.0); \ 363 } else ap2[_i] = value; \ 364 goto b_noinsert; \ 365 } \ 366 } \ 367 if (value == 0.0 && ignorezeroentries) { \ 368 low2 = 0; \ 369 high2 = nrow2; \ 370 goto b_noinsert; \ 371 } \ 372 if (nonew == 1) { \ 373 low2 = 0; \ 374 high2 = nrow2; \ 375 goto b_noinsert; \ 376 } \ 377 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 378 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 379 N = nrow2++ - 1; \ 380 b->nz++; \ 381 high2++; \ 382 /* shift up all the later entries in this row */ \ 383 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 384 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 385 rp2[_i] = col; \ 386 ap2[_i] = value; \ 387 B->nonzerostate++; \ 388 b_noinsert:; \ 389 bilen[row] = nrow2; \ 390 } 391 392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 393 { 394 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 395 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 396 PetscInt l, *garray = mat->garray, diag; 397 PetscScalar *aa, *ba; 398 399 PetscFunctionBegin; 400 /* code only works for square matrices A */ 401 402 /* find size of row to the left of the diagonal part */ 403 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 404 row = row - diag; 405 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 406 if (garray[b->j[b->i[row] + l]] > diag) break; 407 } 408 if (l) { 409 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 410 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 411 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 412 } 413 414 /* diagonal part */ 415 if (a->i[row + 1] - a->i[row]) { 416 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 417 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 418 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 419 } 420 421 /* right of diagonal part */ 422 if (b->i[row + 1] - b->i[row] - l) { 423 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 424 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 425 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 426 } 427 PetscFunctionReturn(0); 428 } 429 430 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 431 { 432 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 433 PetscScalar value = 0.0; 434 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 435 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 436 PetscBool roworiented = aij->roworiented; 437 438 /* Some Variables required in the macro */ 439 Mat A = aij->A; 440 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 441 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 442 PetscBool ignorezeroentries = a->ignorezeroentries; 443 Mat B = aij->B; 444 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 445 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 446 MatScalar *aa, *ba; 447 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 448 PetscInt nonew; 449 MatScalar *ap1, *ap2; 450 451 PetscFunctionBegin; 452 PetscCall(MatSeqAIJGetArray(A, &aa)); 453 PetscCall(MatSeqAIJGetArray(B, &ba)); 454 for (i = 0; i < m; i++) { 455 if (im[i] < 0) continue; 456 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 lastcol1 = -1; 460 rp1 = aj + ai[row]; 461 ap1 = aa + ai[row]; 462 rmax1 = aimax[row]; 463 nrow1 = ailen[row]; 464 low1 = 0; 465 high1 = nrow1; 466 lastcol2 = -1; 467 rp2 = bj + bi[row]; 468 ap2 = ba + bi[row]; 469 rmax2 = bimax[row]; 470 nrow2 = bilen[row]; 471 low2 = 0; 472 high2 = nrow2; 473 474 for (j = 0; j < n; j++) { 475 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 476 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 477 if (in[j] >= cstart && in[j] < cend) { 478 col = in[j] - cstart; 479 nonew = a->nonew; 480 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 481 } else if (in[j] < 0) { 482 continue; 483 } else { 484 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 485 if (mat->was_assembled) { 486 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 487 #if defined(PETSC_USE_CTABLE) 488 PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); /* map global col ids to local ones */ 489 col--; 490 #else 491 col = aij->colmap[in[j]] - 1; 492 #endif 493 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 494 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 495 col = in[j]; 496 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 497 B = aij->B; 498 b = (Mat_SeqAIJ *)B->data; 499 bimax = b->imax; 500 bi = b->i; 501 bilen = b->ilen; 502 bj = b->j; 503 ba = b->a; 504 rp2 = bj + bi[row]; 505 ap2 = ba + bi[row]; 506 rmax2 = bimax[row]; 507 nrow2 = bilen[row]; 508 low2 = 0; 509 high2 = nrow2; 510 bm = aij->B->rmap->n; 511 ba = b->a; 512 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 513 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 514 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 515 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 516 } 517 } else col = in[j]; 518 nonew = b->nonew; 519 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 520 } 521 } 522 } else { 523 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 524 if (!aij->donotstash) { 525 mat->assembled = PETSC_FALSE; 526 if (roworiented) { 527 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 528 } else { 529 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } 531 } 532 } 533 } 534 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 535 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 536 PetscFunctionReturn(0); 537 } 538 539 /* 540 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 541 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 542 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 543 */ 544 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 545 { 546 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 547 Mat A = aij->A; /* diagonal part of the matrix */ 548 Mat B = aij->B; /* offdiagonal part of the matrix */ 549 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 550 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 551 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 552 PetscInt *ailen = a->ilen, *aj = a->j; 553 PetscInt *bilen = b->ilen, *bj = b->j; 554 PetscInt am = aij->A->rmap->n, j; 555 PetscInt diag_so_far = 0, dnz; 556 PetscInt offd_so_far = 0, onz; 557 558 PetscFunctionBegin; 559 /* Iterate over all rows of the matrix */ 560 for (j = 0; j < am; j++) { 561 dnz = onz = 0; 562 /* Iterate over all non-zero columns of the current row */ 563 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 564 /* If column is in the diagonal */ 565 if (mat_j[col] >= cstart && mat_j[col] < cend) { 566 aj[diag_so_far++] = mat_j[col] - cstart; 567 dnz++; 568 } else { /* off-diagonal entries */ 569 bj[offd_so_far++] = mat_j[col]; 570 onz++; 571 } 572 } 573 ailen[j] = dnz; 574 bilen[j] = onz; 575 } 576 PetscFunctionReturn(0); 577 } 578 579 /* 580 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 581 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 582 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 583 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 584 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 585 */ 586 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 587 { 588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 589 Mat A = aij->A; /* diagonal part of the matrix */ 590 Mat B = aij->B; /* offdiagonal part of the matrix */ 591 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 592 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 593 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 594 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 595 PetscInt *ailen = a->ilen, *aj = a->j; 596 PetscInt *bilen = b->ilen, *bj = b->j; 597 PetscInt am = aij->A->rmap->n, j; 598 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 599 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 600 PetscScalar *aa = a->a, *ba = b->a; 601 602 PetscFunctionBegin; 603 /* Iterate over all rows of the matrix */ 604 for (j = 0; j < am; j++) { 605 dnz_row = onz_row = 0; 606 rowstart_offd = full_offd_i[j]; 607 rowstart_diag = full_diag_i[j]; 608 /* Iterate over all non-zero columns of the current row */ 609 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 610 /* If column is in the diagonal */ 611 if (mat_j[col] >= cstart && mat_j[col] < cend) { 612 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 613 aa[rowstart_diag + dnz_row] = mat_a[col]; 614 dnz_row++; 615 } else { /* off-diagonal entries */ 616 bj[rowstart_offd + onz_row] = mat_j[col]; 617 ba[rowstart_offd + onz_row] = mat_a[col]; 618 onz_row++; 619 } 620 } 621 ailen[j] = dnz_row; 622 bilen[j] = onz_row; 623 } 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 628 { 629 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 630 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 632 633 PetscFunctionBegin; 634 for (i = 0; i < m; i++) { 635 if (idxm[i] < 0) continue; /* negative row */ 636 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 637 if (idxm[i] >= rstart && idxm[i] < rend) { 638 row = idxm[i] - rstart; 639 for (j = 0; j < n; j++) { 640 if (idxn[j] < 0) continue; /* negative column */ 641 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 642 if (idxn[j] >= cstart && idxn[j] < cend) { 643 col = idxn[j] - cstart; 644 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 645 } else { 646 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 647 #if defined(PETSC_USE_CTABLE) 648 PetscCall(PetscTableFind(aij->colmap, idxn[j] + 1, &col)); 649 col--; 650 #else 651 col = aij->colmap[idxn[j]] - 1; 652 #endif 653 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 654 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 655 } 656 } 657 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 658 } 659 PetscFunctionReturn(0); 660 } 661 662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 663 { 664 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 665 PetscInt nstash, reallocs; 666 667 PetscFunctionBegin; 668 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 669 670 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 671 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 672 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 673 PetscFunctionReturn(0); 674 } 675 676 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 679 PetscMPIInt n; 680 PetscInt i, j, rstart, ncols, flg; 681 PetscInt *row, *col; 682 PetscBool other_disassembled; 683 PetscScalar *val; 684 685 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 686 687 PetscFunctionBegin; 688 if (!aij->donotstash && !mat->nooffprocentries) { 689 while (1) { 690 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 691 if (!flg) break; 692 693 for (i = 0; i < n;) { 694 /* Now identify the consecutive vals belonging to the same row */ 695 for (j = i, rstart = row[j]; j < n; j++) { 696 if (row[j] != rstart) break; 697 } 698 if (j < n) ncols = j - i; 699 else ncols = n - i; 700 /* Now assemble all these values with a single function call */ 701 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 702 i = j; 703 } 704 } 705 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 706 } 707 #if defined(PETSC_HAVE_DEVICE) 708 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 709 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 710 if (mat->boundtocpu) { 711 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 712 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 713 } 714 #endif 715 PetscCall(MatAssemblyBegin(aij->A, mode)); 716 PetscCall(MatAssemblyEnd(aij->A, mode)); 717 718 /* determine if any processor has disassembled, if so we must 719 also disassemble ourself, in order that we may reassemble. */ 720 /* 721 if nonzero structure of submatrix B cannot change then we know that 722 no processor disassembled thus we can skip this stuff 723 */ 724 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 725 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 726 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 727 PetscCall(MatDisAssemble_MPIAIJ(mat)); 728 } 729 } 730 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 731 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 732 #if defined(PETSC_HAVE_DEVICE) 733 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 734 #endif 735 PetscCall(MatAssemblyBegin(aij->B, mode)); 736 PetscCall(MatAssemblyEnd(aij->B, mode)); 737 738 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 739 740 aij->rowvalues = NULL; 741 742 PetscCall(VecDestroy(&aij->diag)); 743 744 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 745 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 746 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 747 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 748 } 749 #if defined(PETSC_HAVE_DEVICE) 750 mat->offloadmask = PETSC_OFFLOAD_BOTH; 751 #endif 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 756 { 757 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 758 759 PetscFunctionBegin; 760 PetscCall(MatZeroEntries(l->A)); 761 PetscCall(MatZeroEntries(l->B)); 762 PetscFunctionReturn(0); 763 } 764 765 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 766 { 767 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 768 PetscObjectState sA, sB; 769 PetscInt *lrows; 770 PetscInt r, len; 771 PetscBool cong, lch, gch; 772 773 PetscFunctionBegin; 774 /* get locally owned rows */ 775 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 776 PetscCall(MatHasCongruentLayouts(A, &cong)); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 783 PetscCall(VecGetArrayRead(x, &xx)); 784 PetscCall(VecGetArray(b, &bb)); 785 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 786 PetscCall(VecRestoreArrayRead(x, &xx)); 787 PetscCall(VecRestoreArray(b, &bb)); 788 } 789 790 sA = mat->A->nonzerostate; 791 sB = mat->B->nonzerostate; 792 793 if (diag != 0.0 && cong) { 794 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 795 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 796 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 797 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 798 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 799 PetscInt nnwA, nnwB; 800 PetscBool nnzA, nnzB; 801 802 nnwA = aijA->nonew; 803 nnwB = aijB->nonew; 804 nnzA = aijA->keepnonzeropattern; 805 nnzB = aijB->keepnonzeropattern; 806 if (!nnzA) { 807 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 808 aijA->nonew = 0; 809 } 810 if (!nnzB) { 811 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 812 aijB->nonew = 0; 813 } 814 /* Must zero here before the next loop */ 815 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 816 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 if (row >= A->cmap->N) continue; 820 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 821 } 822 aijA->nonew = nnwA; 823 aijB->nonew = nnwB; 824 } else { 825 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 826 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 827 } 828 PetscCall(PetscFree(lrows)); 829 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 830 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 831 832 /* reduce nonzerostate */ 833 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 834 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 835 if (gch) A->nonzerostate++; 836 PetscFunctionReturn(0); 837 } 838 839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 840 { 841 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i, j, r, m, len = 0; 844 PetscInt *lrows, *owners = A->rmap->range; 845 PetscMPIInt p = 0; 846 PetscSFNode *rrows; 847 PetscSF sf; 848 const PetscScalar *xx; 849 PetscScalar *bb, *mask, *aij_a; 850 Vec xmask, lmask; 851 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 852 const PetscInt *aj, *ii, *ridx; 853 PetscScalar *aa; 854 855 PetscFunctionBegin; 856 /* Create SF where leaves are input rows and roots are owned rows */ 857 PetscCall(PetscMalloc1(n, &lrows)); 858 for (r = 0; r < n; ++r) lrows[r] = -1; 859 PetscCall(PetscMalloc1(N, &rrows)); 860 for (r = 0; r < N; ++r) { 861 const PetscInt idx = rows[r]; 862 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 863 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 864 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 865 } 866 rrows[r].rank = p; 867 rrows[r].index = rows[r] - owners[p]; 868 } 869 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 870 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 871 /* Collect flags for rows to be zeroed */ 872 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 873 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 874 PetscCall(PetscSFDestroy(&sf)); 875 /* Compress and put in row numbers */ 876 for (r = 0; r < n; ++r) 877 if (lrows[r] >= 0) lrows[len++] = r; 878 /* zero diagonal part of matrix */ 879 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 880 /* handle off diagonal part of matrix */ 881 PetscCall(MatCreateVecs(A, &xmask, NULL)); 882 PetscCall(VecDuplicate(l->lvec, &lmask)); 883 PetscCall(VecGetArray(xmask, &bb)); 884 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 885 PetscCall(VecRestoreArray(xmask, &bb)); 886 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 887 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 888 PetscCall(VecDestroy(&xmask)); 889 if (x && b) { /* this code is buggy when the row and column layout don't match */ 890 PetscBool cong; 891 892 PetscCall(MatHasCongruentLayouts(A, &cong)); 893 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 894 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 895 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 896 PetscCall(VecGetArrayRead(l->lvec, &xx)); 897 PetscCall(VecGetArray(b, &bb)); 898 } 899 PetscCall(VecGetArray(lmask, &mask)); 900 /* remove zeroed rows of off diagonal matrix */ 901 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 902 ii = aij->i; 903 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 904 /* loop over all elements of off process part of matrix zeroing removed columns*/ 905 if (aij->compressedrow.use) { 906 m = aij->compressedrow.nrows; 907 ii = aij->compressedrow.i; 908 ridx = aij->compressedrow.rindex; 909 for (i = 0; i < m; i++) { 910 n = ii[i + 1] - ii[i]; 911 aj = aij->j + ii[i]; 912 aa = aij_a + ii[i]; 913 914 for (j = 0; j < n; j++) { 915 if (PetscAbsScalar(mask[*aj])) { 916 if (b) bb[*ridx] -= *aa * xx[*aj]; 917 *aa = 0.0; 918 } 919 aa++; 920 aj++; 921 } 922 ridx++; 923 } 924 } else { /* do not use compressed row format */ 925 m = l->B->rmap->n; 926 for (i = 0; i < m; i++) { 927 n = ii[i + 1] - ii[i]; 928 aj = aij->j + ii[i]; 929 aa = aij_a + ii[i]; 930 for (j = 0; j < n; j++) { 931 if (PetscAbsScalar(mask[*aj])) { 932 if (b) bb[i] -= *aa * xx[*aj]; 933 *aa = 0.0; 934 } 935 aa++; 936 aj++; 937 } 938 } 939 } 940 if (x && b) { 941 PetscCall(VecRestoreArray(b, &bb)); 942 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 943 } 944 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 945 PetscCall(VecRestoreArray(lmask, &mask)); 946 PetscCall(VecDestroy(&lmask)); 947 PetscCall(PetscFree(lrows)); 948 949 /* only change matrix nonzero state if pattern was allowed to be changed */ 950 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 951 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 952 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 958 { 959 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 960 PetscInt nt; 961 VecScatter Mvctx = a->Mvctx; 962 963 PetscFunctionBegin; 964 PetscCall(VecGetLocalSize(xx, &nt)); 965 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 966 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 967 PetscUseTypeMethod(a->A, mult, xx, yy); 968 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 969 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 976 977 PetscFunctionBegin; 978 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 985 VecScatter Mvctx = a->Mvctx; 986 987 PetscFunctionBegin; 988 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 989 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 990 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 992 PetscFunctionReturn(0); 993 } 994 995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 998 999 PetscFunctionBegin; 1000 /* do nondiagonal part */ 1001 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1002 /* do local part */ 1003 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1004 /* add partial results together */ 1005 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1006 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1011 { 1012 MPI_Comm comm; 1013 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij; 1014 Mat Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs; 1015 IS Me, Notme; 1016 PetscInt M, N, first, last, *notme, i; 1017 PetscBool lf; 1018 PetscMPIInt size; 1019 1020 PetscFunctionBegin; 1021 /* Easy test: symmetric diagonal block */ 1022 Bij = (Mat_MPIAIJ *)Bmat->data; 1023 Bdia = Bij->A; 1024 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1025 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1026 if (!*f) PetscFunctionReturn(0); 1027 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1028 PetscCallMPI(MPI_Comm_size(comm, &size)); 1029 if (size == 1) PetscFunctionReturn(0); 1030 1031 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1032 PetscCall(MatGetSize(Amat, &M, &N)); 1033 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1034 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1035 for (i = 0; i < first; i++) notme[i] = i; 1036 for (i = last; i < M; i++) notme[i - last + first] = i; 1037 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1038 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1039 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1040 Aoff = Aoffs[0]; 1041 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1042 Boff = Boffs[0]; 1043 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1044 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1045 PetscCall(MatDestroyMatrices(1, &Boffs)); 1046 PetscCall(ISDestroy(&Me)); 1047 PetscCall(ISDestroy(&Notme)); 1048 PetscCall(PetscFree(notme)); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1053 { 1054 PetscFunctionBegin; 1055 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1060 { 1061 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1062 1063 PetscFunctionBegin; 1064 /* do nondiagonal part */ 1065 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1066 /* do local part */ 1067 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1068 /* add partial results together */ 1069 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1070 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 /* 1075 This only works correctly for square matrices where the subblock A->A is the 1076 diagonal block 1077 */ 1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1079 { 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1081 1082 PetscFunctionBegin; 1083 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1084 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1085 PetscCall(MatGetDiagonal(a->A, v)); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1092 1093 PetscFunctionBegin; 1094 PetscCall(MatScale(a->A, aa)); 1095 PetscCall(MatScale(a->B, aa)); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1100 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1101 { 1102 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1103 1104 PetscFunctionBegin; 1105 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1106 PetscCall(PetscFree(aij->Aperm1)); 1107 PetscCall(PetscFree(aij->Bperm1)); 1108 PetscCall(PetscFree(aij->Ajmap1)); 1109 PetscCall(PetscFree(aij->Bjmap1)); 1110 1111 PetscCall(PetscFree(aij->Aimap2)); 1112 PetscCall(PetscFree(aij->Bimap2)); 1113 PetscCall(PetscFree(aij->Aperm2)); 1114 PetscCall(PetscFree(aij->Bperm2)); 1115 PetscCall(PetscFree(aij->Ajmap2)); 1116 PetscCall(PetscFree(aij->Bjmap2)); 1117 1118 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1119 PetscCall(PetscFree(aij->Cperm1)); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1124 { 1125 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1126 1127 PetscFunctionBegin; 1128 #if defined(PETSC_USE_LOG) 1129 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1130 #endif 1131 PetscCall(MatStashDestroy_Private(&mat->stash)); 1132 PetscCall(VecDestroy(&aij->diag)); 1133 PetscCall(MatDestroy(&aij->A)); 1134 PetscCall(MatDestroy(&aij->B)); 1135 #if defined(PETSC_USE_CTABLE) 1136 PetscCall(PetscTableDestroy(&aij->colmap)); 1137 #else 1138 PetscCall(PetscFree(aij->colmap)); 1139 #endif 1140 PetscCall(PetscFree(aij->garray)); 1141 PetscCall(VecDestroy(&aij->lvec)); 1142 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1143 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 1144 PetscCall(PetscFree(aij->ld)); 1145 1146 /* Free COO */ 1147 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1148 1149 PetscCall(PetscFree(mat->data)); 1150 1151 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1152 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 1153 1154 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1157 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 1158 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 1159 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 1160 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 1161 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 1164 #if defined(PETSC_HAVE_CUDA) 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 1166 #endif 1167 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 1169 #endif 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 1171 #if defined(PETSC_HAVE_ELEMENTAL) 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 1173 #endif 1174 #if defined(PETSC_HAVE_SCALAPACK) 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 1176 #endif 1177 #if defined(PETSC_HAVE_HYPRE) 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 1179 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 1180 #endif 1181 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1182 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 1184 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 1185 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 1187 #if defined(PETSC_HAVE_MKL_SPARSE) 1188 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 1189 #endif 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1199 { 1200 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1201 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1202 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1203 const PetscInt *garray = aij->garray; 1204 const PetscScalar *aa, *ba; 1205 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1206 PetscInt *rowlens; 1207 PetscInt *colidxs; 1208 PetscScalar *matvals; 1209 1210 PetscFunctionBegin; 1211 PetscCall(PetscViewerSetUp(viewer)); 1212 1213 M = mat->rmap->N; 1214 N = mat->cmap->N; 1215 m = mat->rmap->n; 1216 rs = mat->rmap->rstart; 1217 cs = mat->cmap->rstart; 1218 nz = A->nz + B->nz; 1219 1220 /* write matrix header */ 1221 header[0] = MAT_FILE_CLASSID; 1222 header[1] = M; 1223 header[2] = N; 1224 header[3] = nz; 1225 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1226 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1227 1228 /* fill in and store row lengths */ 1229 PetscCall(PetscMalloc1(m, &rowlens)); 1230 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1231 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1232 PetscCall(PetscFree(rowlens)); 1233 1234 /* fill in and store column indices */ 1235 PetscCall(PetscMalloc1(nz, &colidxs)); 1236 for (cnt = 0, i = 0; i < m; i++) { 1237 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 colidxs[cnt++] = garray[B->j[jb]]; 1240 } 1241 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1242 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1243 } 1244 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1245 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1246 PetscCall(PetscFree(colidxs)); 1247 1248 /* fill in and store nonzero values */ 1249 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1250 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1251 PetscCall(PetscMalloc1(nz, &matvals)); 1252 for (cnt = 0, i = 0; i < m; i++) { 1253 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1254 if (garray[B->j[jb]] > cs) break; 1255 matvals[cnt++] = ba[jb]; 1256 } 1257 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1258 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1259 } 1260 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1261 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1262 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1263 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1264 PetscCall(PetscFree(matvals)); 1265 1266 /* write block size option to the viewer's .info file */ 1267 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #include <petscdraw.h> 1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1273 { 1274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1275 PetscMPIInt rank = aij->rank, size = aij->size; 1276 PetscBool isdraw, iascii, isbinary; 1277 PetscViewer sviewer; 1278 PetscViewerFormat format; 1279 1280 PetscFunctionBegin; 1281 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1282 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1283 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1284 if (iascii) { 1285 PetscCall(PetscViewerGetFormat(viewer, &format)); 1286 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1287 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1288 PetscCall(PetscMalloc1(size, &nz)); 1289 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1290 for (i = 0; i < (PetscInt)size; i++) { 1291 nmax = PetscMax(nmax, nz[i]); 1292 nmin = PetscMin(nmin, nz[i]); 1293 navg += nz[i]; 1294 } 1295 PetscCall(PetscFree(nz)); 1296 navg = navg / size; 1297 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1298 PetscFunctionReturn(0); 1299 } 1300 PetscCall(PetscViewerGetFormat(viewer, &format)); 1301 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1302 MatInfo info; 1303 PetscInt *inodes = NULL; 1304 1305 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1306 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1307 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1308 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1309 if (!inodes) { 1310 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1311 (double)info.memory)); 1312 } else { 1313 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1314 (double)info.memory)); 1315 } 1316 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1317 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1318 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1319 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1320 PetscCall(PetscViewerFlush(viewer)); 1321 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1322 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1323 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1324 PetscFunctionReturn(0); 1325 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1326 PetscInt inodecount, inodelimit, *inodes; 1327 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1328 if (inodes) { 1329 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1330 } else { 1331 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1332 } 1333 PetscFunctionReturn(0); 1334 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1335 PetscFunctionReturn(0); 1336 } 1337 } else if (isbinary) { 1338 if (size == 1) { 1339 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1340 PetscCall(MatView(aij->A, viewer)); 1341 } else { 1342 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1343 } 1344 PetscFunctionReturn(0); 1345 } else if (iascii && size == 1) { 1346 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1347 PetscCall(MatView(aij->A, viewer)); 1348 PetscFunctionReturn(0); 1349 } else if (isdraw) { 1350 PetscDraw draw; 1351 PetscBool isnull; 1352 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1353 PetscCall(PetscDrawIsNull(draw, &isnull)); 1354 if (isnull) PetscFunctionReturn(0); 1355 } 1356 1357 { /* assemble the entire matrix onto first processor */ 1358 Mat A = NULL, Av; 1359 IS isrow, iscol; 1360 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1362 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1363 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1364 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1365 /* The commented code uses MatCreateSubMatrices instead */ 1366 /* 1367 Mat *AA, A = NULL, Av; 1368 IS isrow,iscol; 1369 1370 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1372 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1373 if (rank == 0) { 1374 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1375 A = AA[0]; 1376 Av = AA[0]; 1377 } 1378 PetscCall(MatDestroySubMatrices(1,&AA)); 1379 */ 1380 PetscCall(ISDestroy(&iscol)); 1381 PetscCall(ISDestroy(&isrow)); 1382 /* 1383 Everyone has to call to draw the matrix since the graphics waits are 1384 synchronized across all processors that share the PetscDraw object 1385 */ 1386 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1387 if (rank == 0) { 1388 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1389 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1390 } 1391 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1392 PetscCall(PetscViewerFlush(viewer)); 1393 PetscCall(MatDestroy(&A)); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1399 { 1400 PetscBool iascii, isdraw, issocket, isbinary; 1401 1402 PetscFunctionBegin; 1403 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1404 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1405 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1406 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1407 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1412 { 1413 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1414 Vec bb1 = NULL; 1415 PetscBool hasop; 1416 1417 PetscFunctionBegin; 1418 if (flag == SOR_APPLY_UPPER) { 1419 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1424 1425 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1426 if (flag & SOR_ZERO_INITIAL_GUESS) { 1427 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1428 its--; 1429 } 1430 1431 while (its--) { 1432 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1433 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1434 1435 /* update rhs: bb1 = bb - B*x */ 1436 PetscCall(VecScale(mat->lvec, -1.0)); 1437 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1438 1439 /* local sweep */ 1440 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1441 } 1442 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1443 if (flag & SOR_ZERO_INITIAL_GUESS) { 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1445 its--; 1446 } 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec, -1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1457 } 1458 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1461 its--; 1462 } 1463 while (its--) { 1464 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 PetscCall(VecScale(mat->lvec, -1.0)); 1469 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1470 1471 /* local sweep */ 1472 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1473 } 1474 } else if (flag & SOR_EISENSTAT) { 1475 Vec xx1; 1476 1477 PetscCall(VecDuplicate(bb, &xx1)); 1478 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1479 1480 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1481 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1482 if (!mat->diag) { 1483 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1484 PetscCall(MatGetDiagonal(matin, mat->diag)); 1485 } 1486 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1487 if (hasop) { 1488 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1489 } else { 1490 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1491 } 1492 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1493 1494 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1495 1496 /* local sweep */ 1497 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1498 PetscCall(VecAXPY(xx, 1.0, xx1)); 1499 PetscCall(VecDestroy(&xx1)); 1500 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1501 1502 PetscCall(VecDestroy(&bb1)); 1503 1504 matin->factorerrortype = mat->A->factorerrortype; 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1509 { 1510 Mat aA, aB, Aperm; 1511 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1512 PetscScalar *aa, *ba; 1513 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1514 PetscSF rowsf, sf; 1515 IS parcolp = NULL; 1516 PetscBool done; 1517 1518 PetscFunctionBegin; 1519 PetscCall(MatGetLocalSize(A, &m, &n)); 1520 PetscCall(ISGetIndices(rowp, &rwant)); 1521 PetscCall(ISGetIndices(colp, &cwant)); 1522 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1523 1524 /* Invert row permutation to find out where my rows should go */ 1525 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1526 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1527 PetscCall(PetscSFSetFromOptions(rowsf)); 1528 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1529 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1530 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1531 1532 /* Invert column permutation to find out where my columns should go */ 1533 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1534 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1535 PetscCall(PetscSFSetFromOptions(sf)); 1536 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1537 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1538 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1539 PetscCall(PetscSFDestroy(&sf)); 1540 1541 PetscCall(ISRestoreIndices(rowp, &rwant)); 1542 PetscCall(ISRestoreIndices(colp, &cwant)); 1543 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1544 1545 /* Find out where my gcols should go */ 1546 PetscCall(MatGetSize(aB, NULL, &ng)); 1547 PetscCall(PetscMalloc1(ng, &gcdest)); 1548 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1549 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1550 PetscCall(PetscSFSetFromOptions(sf)); 1551 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&sf)); 1554 1555 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1556 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1557 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt row = rdest[i]; 1560 PetscMPIInt rowner; 1561 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1562 for (j = ai[i]; j < ai[i + 1]; j++) { 1563 PetscInt col = cdest[aj[j]]; 1564 PetscMPIInt cowner; 1565 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1566 if (rowner == cowner) dnnz[i]++; 1567 else onnz[i]++; 1568 } 1569 for (j = bi[i]; j < bi[i + 1]; j++) { 1570 PetscInt col = gcdest[bj[j]]; 1571 PetscMPIInt cowner; 1572 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1573 if (rowner == cowner) dnnz[i]++; 1574 else onnz[i]++; 1575 } 1576 } 1577 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1578 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1579 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1580 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1581 PetscCall(PetscSFDestroy(&rowsf)); 1582 1583 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1584 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1585 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1586 for (i = 0; i < m; i++) { 1587 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1588 PetscInt j0, rowlen; 1589 rowlen = ai[i + 1] - ai[i]; 1590 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1591 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1592 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1593 } 1594 rowlen = bi[i + 1] - bi[i]; 1595 for (j0 = j = 0; j < rowlen; j0 = j) { 1596 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1597 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1598 } 1599 } 1600 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1601 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1602 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1603 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1604 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1605 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1606 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1607 PetscCall(PetscFree3(work, rdest, cdest)); 1608 PetscCall(PetscFree(gcdest)); 1609 if (parcolp) PetscCall(ISDestroy(&colp)); 1610 *B = Aperm; 1611 PetscFunctionReturn(0); 1612 } 1613 1614 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1615 { 1616 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1617 1618 PetscFunctionBegin; 1619 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1620 if (ghosts) *ghosts = aij->garray; 1621 PetscFunctionReturn(0); 1622 } 1623 1624 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1625 { 1626 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1627 Mat A = mat->A, B = mat->B; 1628 PetscLogDouble isend[5], irecv[5]; 1629 1630 PetscFunctionBegin; 1631 info->block_size = 1.0; 1632 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1633 1634 isend[0] = info->nz_used; 1635 isend[1] = info->nz_allocated; 1636 isend[2] = info->nz_unneeded; 1637 isend[3] = info->memory; 1638 isend[4] = info->mallocs; 1639 1640 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1641 1642 isend[0] += info->nz_used; 1643 isend[1] += info->nz_allocated; 1644 isend[2] += info->nz_unneeded; 1645 isend[3] += info->memory; 1646 isend[4] += info->mallocs; 1647 if (flag == MAT_LOCAL) { 1648 info->nz_used = isend[0]; 1649 info->nz_allocated = isend[1]; 1650 info->nz_unneeded = isend[2]; 1651 info->memory = isend[3]; 1652 info->mallocs = isend[4]; 1653 } else if (flag == MAT_GLOBAL_MAX) { 1654 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1655 1656 info->nz_used = irecv[0]; 1657 info->nz_allocated = irecv[1]; 1658 info->nz_unneeded = irecv[2]; 1659 info->memory = irecv[3]; 1660 info->mallocs = irecv[4]; 1661 } else if (flag == MAT_GLOBAL_SUM) { 1662 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } 1670 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1671 info->fill_ratio_needed = 0; 1672 info->factor_mallocs = 0; 1673 PetscFunctionReturn(0); 1674 } 1675 1676 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1677 { 1678 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1679 1680 PetscFunctionBegin; 1681 switch (op) { 1682 case MAT_NEW_NONZERO_LOCATIONS: 1683 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1684 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1685 case MAT_KEEP_NONZERO_PATTERN: 1686 case MAT_NEW_NONZERO_LOCATION_ERR: 1687 case MAT_USE_INODES: 1688 case MAT_IGNORE_ZERO_ENTRIES: 1689 case MAT_FORM_EXPLICIT_TRANSPOSE: 1690 MatCheckPreallocated(A, 1); 1691 PetscCall(MatSetOption(a->A, op, flg)); 1692 PetscCall(MatSetOption(a->B, op, flg)); 1693 break; 1694 case MAT_ROW_ORIENTED: 1695 MatCheckPreallocated(A, 1); 1696 a->roworiented = flg; 1697 1698 PetscCall(MatSetOption(a->A, op, flg)); 1699 PetscCall(MatSetOption(a->B, op, flg)); 1700 break; 1701 case MAT_FORCE_DIAGONAL_ENTRIES: 1702 case MAT_SORTED_FULL: 1703 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1704 break; 1705 case MAT_IGNORE_OFF_PROC_ENTRIES: 1706 a->donotstash = flg; 1707 break; 1708 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1709 case MAT_SPD: 1710 case MAT_SYMMETRIC: 1711 case MAT_STRUCTURALLY_SYMMETRIC: 1712 case MAT_HERMITIAN: 1713 case MAT_SYMMETRY_ETERNAL: 1714 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1715 case MAT_SPD_ETERNAL: 1716 /* if the diagonal matrix is square it inherits some of the properties above */ 1717 break; 1718 case MAT_SUBMAT_SINGLEIS: 1719 A->submat_singleis = flg; 1720 break; 1721 case MAT_STRUCTURE_ONLY: 1722 /* The option is handled directly by MatSetOption() */ 1723 break; 1724 default: 1725 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1731 { 1732 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1733 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1734 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1735 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1736 PetscInt *cmap, *idx_p; 1737 1738 PetscFunctionBegin; 1739 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1740 mat->getrowactive = PETSC_TRUE; 1741 1742 if (!mat->rowvalues && (idx || v)) { 1743 /* 1744 allocate enough space to hold information from the longest row. 1745 */ 1746 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1747 PetscInt max = 1, tmp; 1748 for (i = 0; i < matin->rmap->n; i++) { 1749 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1750 if (max < tmp) max = tmp; 1751 } 1752 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1753 } 1754 1755 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1756 lrow = row - rstart; 1757 1758 pvA = &vworkA; 1759 pcA = &cworkA; 1760 pvB = &vworkB; 1761 pcB = &cworkB; 1762 if (!v) { 1763 pvA = NULL; 1764 pvB = NULL; 1765 } 1766 if (!idx) { 1767 pcA = NULL; 1768 if (!v) pcB = NULL; 1769 } 1770 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1771 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1772 nztot = nzA + nzB; 1773 1774 cmap = mat->garray; 1775 if (v || idx) { 1776 if (nztot) { 1777 /* Sort by increasing column numbers, assuming A and B already sorted */ 1778 PetscInt imark = -1; 1779 if (v) { 1780 *v = v_p = mat->rowvalues; 1781 for (i = 0; i < nzB; i++) { 1782 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1783 else break; 1784 } 1785 imark = i; 1786 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1787 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1788 } 1789 if (idx) { 1790 *idx = idx_p = mat->rowindices; 1791 if (imark > -1) { 1792 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1793 } else { 1794 for (i = 0; i < nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1796 else break; 1797 } 1798 imark = i; 1799 } 1800 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1801 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 if (idx) *idx = NULL; 1805 if (v) *v = NULL; 1806 } 1807 } 1808 *nz = nztot; 1809 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1810 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1815 { 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1817 1818 PetscFunctionBegin; 1819 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1820 aij->getrowactive = PETSC_FALSE; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1825 { 1826 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1827 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1828 PetscInt i, j, cstart = mat->cmap->rstart; 1829 PetscReal sum = 0.0; 1830 const MatScalar *v, *amata, *bmata; 1831 1832 PetscFunctionBegin; 1833 if (aij->size == 1) { 1834 PetscCall(MatNorm(aij->A, type, norm)); 1835 } else { 1836 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1837 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1838 if (type == NORM_FROBENIUS) { 1839 v = amata; 1840 for (i = 0; i < amat->nz; i++) { 1841 sum += PetscRealPart(PetscConj(*v) * (*v)); 1842 v++; 1843 } 1844 v = bmata; 1845 for (i = 0; i < bmat->nz; i++) { 1846 sum += PetscRealPart(PetscConj(*v) * (*v)); 1847 v++; 1848 } 1849 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1850 *norm = PetscSqrtReal(*norm); 1851 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1852 } else if (type == NORM_1) { /* max column norm */ 1853 PetscReal *tmp, *tmp2; 1854 PetscInt *jj, *garray = aij->garray; 1855 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1856 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1857 *norm = 0.0; 1858 v = amata; 1859 jj = amat->j; 1860 for (j = 0; j < amat->nz; j++) { 1861 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1862 v++; 1863 } 1864 v = bmata; 1865 jj = bmat->j; 1866 for (j = 0; j < bmat->nz; j++) { 1867 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1868 v++; 1869 } 1870 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1871 for (j = 0; j < mat->cmap->N; j++) { 1872 if (tmp2[j] > *norm) *norm = tmp2[j]; 1873 } 1874 PetscCall(PetscFree(tmp)); 1875 PetscCall(PetscFree(tmp2)); 1876 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1877 } else if (type == NORM_INFINITY) { /* max row norm */ 1878 PetscReal ntemp = 0.0; 1879 for (j = 0; j < aij->A->rmap->n; j++) { 1880 v = amata + amat->i[j]; 1881 sum = 0.0; 1882 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1883 sum += PetscAbsScalar(*v); 1884 v++; 1885 } 1886 v = bmata + bmat->i[j]; 1887 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1888 sum += PetscAbsScalar(*v); 1889 v++; 1890 } 1891 if (sum > ntemp) ntemp = sum; 1892 } 1893 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1894 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1895 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1896 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1897 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1903 { 1904 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1905 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1906 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1907 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1908 Mat B, A_diag, *B_diag; 1909 const MatScalar *pbv, *bv; 1910 1911 PetscFunctionBegin; 1912 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1913 ma = A->rmap->n; 1914 na = A->cmap->n; 1915 mb = a->B->rmap->n; 1916 nb = a->B->cmap->n; 1917 ai = Aloc->i; 1918 aj = Aloc->j; 1919 bi = Bloc->i; 1920 bj = Bloc->j; 1921 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1922 PetscInt *d_nnz, *g_nnz, *o_nnz; 1923 PetscSFNode *oloc; 1924 PETSC_UNUSED PetscSF sf; 1925 1926 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1927 /* compute d_nnz for preallocation */ 1928 PetscCall(PetscArrayzero(d_nnz, na)); 1929 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1930 /* compute local off-diagonal contributions */ 1931 PetscCall(PetscArrayzero(g_nnz, nb)); 1932 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1933 /* map those to global */ 1934 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1935 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1936 PetscCall(PetscSFSetFromOptions(sf)); 1937 PetscCall(PetscArrayzero(o_nnz, na)); 1938 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1939 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1940 PetscCall(PetscSFDestroy(&sf)); 1941 1942 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1943 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1944 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1945 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1946 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1947 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1948 } else { 1949 B = *matout; 1950 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1951 } 1952 1953 b = (Mat_MPIAIJ *)B->data; 1954 A_diag = a->A; 1955 B_diag = &b->A; 1956 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1957 A_diag_ncol = A_diag->cmap->N; 1958 B_diag_ilen = sub_B_diag->ilen; 1959 B_diag_i = sub_B_diag->i; 1960 1961 /* Set ilen for diagonal of B */ 1962 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1963 1964 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1965 very quickly (=without using MatSetValues), because all writes are local. */ 1966 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1967 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1968 1969 /* copy over the B part */ 1970 PetscCall(PetscMalloc1(bi[mb], &cols)); 1971 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1972 pbv = bv; 1973 row = A->rmap->rstart; 1974 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1975 cols_tmp = cols; 1976 for (i = 0; i < mb; i++) { 1977 ncol = bi[i + 1] - bi[i]; 1978 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1979 row++; 1980 pbv += ncol; 1981 cols_tmp += ncol; 1982 } 1983 PetscCall(PetscFree(cols)); 1984 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1985 1986 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1987 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1988 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1989 *matout = B; 1990 } else { 1991 PetscCall(MatHeaderMerge(A, &B)); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1997 { 1998 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1999 Mat a = aij->A, b = aij->B; 2000 PetscInt s1, s2, s3; 2001 2002 PetscFunctionBegin; 2003 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2004 if (rr) { 2005 PetscCall(VecGetLocalSize(rr, &s1)); 2006 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2007 /* Overlap communication with computation. */ 2008 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2009 } 2010 if (ll) { 2011 PetscCall(VecGetLocalSize(ll, &s1)); 2012 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2013 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2014 } 2015 /* scale the diagonal block */ 2016 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2017 2018 if (rr) { 2019 /* Do a scatter end and then right scale the off-diagonal block */ 2020 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2021 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2022 } 2023 PetscFunctionReturn(0); 2024 } 2025 2026 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2027 { 2028 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2029 2030 PetscFunctionBegin; 2031 PetscCall(MatSetUnfactored(a->A)); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2036 { 2037 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2038 Mat a, b, c, d; 2039 PetscBool flg; 2040 2041 PetscFunctionBegin; 2042 a = matA->A; 2043 b = matA->B; 2044 c = matB->A; 2045 d = matB->B; 2046 2047 PetscCall(MatEqual(a, c, &flg)); 2048 if (flg) PetscCall(MatEqual(b, d, &flg)); 2049 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2054 { 2055 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2056 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2057 2058 PetscFunctionBegin; 2059 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2060 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2061 /* because of the column compression in the off-processor part of the matrix a->B, 2062 the number of columns in a->B and b->B may be different, hence we cannot call 2063 the MatCopy() directly on the two parts. If need be, we can provide a more 2064 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2065 then copying the submatrices */ 2066 PetscCall(MatCopy_Basic(A, B, str)); 2067 } else { 2068 PetscCall(MatCopy(a->A, b->A, str)); 2069 PetscCall(MatCopy(a->B, b->B, str)); 2070 } 2071 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2076 { 2077 PetscFunctionBegin; 2078 PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 2079 PetscFunctionReturn(0); 2080 } 2081 2082 /* 2083 Computes the number of nonzeros per row needed for preallocation when X and Y 2084 have different nonzero structure. 2085 */ 2086 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2087 { 2088 PetscInt i, j, k, nzx, nzy; 2089 2090 PetscFunctionBegin; 2091 /* Set the number of nonzeros in the new matrix */ 2092 for (i = 0; i < m; i++) { 2093 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2094 nzx = xi[i + 1] - xi[i]; 2095 nzy = yi[i + 1] - yi[i]; 2096 nnz[i] = 0; 2097 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2098 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2099 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2100 nnz[i]++; 2101 } 2102 for (; k < nzy; k++) nnz[i]++; 2103 } 2104 PetscFunctionReturn(0); 2105 } 2106 2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2109 { 2110 PetscInt m = Y->rmap->N; 2111 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2112 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2113 2114 PetscFunctionBegin; 2115 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2120 { 2121 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2122 2123 PetscFunctionBegin; 2124 if (str == SAME_NONZERO_PATTERN) { 2125 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2126 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2127 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2128 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2129 } else { 2130 Mat B; 2131 PetscInt *nnz_d, *nnz_o; 2132 2133 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2134 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2135 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2136 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2137 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2138 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2139 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2140 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2141 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2142 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2143 PetscCall(MatHeaderMerge(Y, &B)); 2144 PetscCall(PetscFree(nnz_d)); 2145 PetscCall(PetscFree(nnz_o)); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2151 2152 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2153 { 2154 PetscFunctionBegin; 2155 if (PetscDefined(USE_COMPLEX)) { 2156 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2157 2158 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2159 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2160 } 2161 PetscFunctionReturn(0); 2162 } 2163 2164 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2165 { 2166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2167 2168 PetscFunctionBegin; 2169 PetscCall(MatRealPart(a->A)); 2170 PetscCall(MatRealPart(a->B)); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2175 { 2176 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2177 2178 PetscFunctionBegin; 2179 PetscCall(MatImaginaryPart(a->A)); 2180 PetscCall(MatImaginaryPart(a->B)); 2181 PetscFunctionReturn(0); 2182 } 2183 2184 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2185 { 2186 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2187 PetscInt i, *idxb = NULL, m = A->rmap->n; 2188 PetscScalar *va, *vv; 2189 Vec vB, vA; 2190 const PetscScalar *vb; 2191 2192 PetscFunctionBegin; 2193 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2194 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2195 2196 PetscCall(VecGetArrayWrite(vA, &va)); 2197 if (idx) { 2198 for (i = 0; i < m; i++) { 2199 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2200 } 2201 } 2202 2203 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2204 PetscCall(PetscMalloc1(m, &idxb)); 2205 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2206 2207 PetscCall(VecGetArrayWrite(v, &vv)); 2208 PetscCall(VecGetArrayRead(vB, &vb)); 2209 for (i = 0; i < m; i++) { 2210 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2211 vv[i] = vb[i]; 2212 if (idx) idx[i] = a->garray[idxb[i]]; 2213 } else { 2214 vv[i] = va[i]; 2215 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2216 } 2217 } 2218 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2219 PetscCall(VecRestoreArrayWrite(vA, &va)); 2220 PetscCall(VecRestoreArrayRead(vB, &vb)); 2221 PetscCall(PetscFree(idxb)); 2222 PetscCall(VecDestroy(&vA)); 2223 PetscCall(VecDestroy(&vB)); 2224 PetscFunctionReturn(0); 2225 } 2226 2227 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2228 { 2229 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2230 PetscInt m = A->rmap->n, n = A->cmap->n; 2231 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2232 PetscInt *cmap = mat->garray; 2233 PetscInt *diagIdx, *offdiagIdx; 2234 Vec diagV, offdiagV; 2235 PetscScalar *a, *diagA, *offdiagA; 2236 const PetscScalar *ba, *bav; 2237 PetscInt r, j, col, ncols, *bi, *bj; 2238 Mat B = mat->B; 2239 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2240 2241 PetscFunctionBegin; 2242 /* When a process holds entire A and other processes have no entry */ 2243 if (A->cmap->N == n) { 2244 PetscCall(VecGetArrayWrite(v, &diagA)); 2245 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2246 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2247 PetscCall(VecDestroy(&diagV)); 2248 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2249 PetscFunctionReturn(0); 2250 } else if (n == 0) { 2251 if (m) { 2252 PetscCall(VecGetArrayWrite(v, &a)); 2253 for (r = 0; r < m; r++) { 2254 a[r] = 0.0; 2255 if (idx) idx[r] = -1; 2256 } 2257 PetscCall(VecRestoreArrayWrite(v, &a)); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2263 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2264 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2265 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2266 2267 /* Get offdiagIdx[] for implicit 0.0 */ 2268 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2269 ba = bav; 2270 bi = b->i; 2271 bj = b->j; 2272 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2273 for (r = 0; r < m; r++) { 2274 ncols = bi[r + 1] - bi[r]; 2275 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2276 offdiagA[r] = *ba; 2277 offdiagIdx[r] = cmap[0]; 2278 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2279 offdiagA[r] = 0.0; 2280 2281 /* Find first hole in the cmap */ 2282 for (j = 0; j < ncols; j++) { 2283 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2284 if (col > j && j < cstart) { 2285 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2286 break; 2287 } else if (col > j + n && j >= cstart) { 2288 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2289 break; 2290 } 2291 } 2292 if (j == ncols && ncols < A->cmap->N - n) { 2293 /* a hole is outside compressed Bcols */ 2294 if (ncols == 0) { 2295 if (cstart) { 2296 offdiagIdx[r] = 0; 2297 } else offdiagIdx[r] = cend; 2298 } else { /* ncols > 0 */ 2299 offdiagIdx[r] = cmap[ncols - 1] + 1; 2300 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2301 } 2302 } 2303 } 2304 2305 for (j = 0; j < ncols; j++) { 2306 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2307 offdiagA[r] = *ba; 2308 offdiagIdx[r] = cmap[*bj]; 2309 } 2310 ba++; 2311 bj++; 2312 } 2313 } 2314 2315 PetscCall(VecGetArrayWrite(v, &a)); 2316 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2317 for (r = 0; r < m; ++r) { 2318 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2319 a[r] = diagA[r]; 2320 if (idx) idx[r] = cstart + diagIdx[r]; 2321 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 if (idx) { 2324 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2325 idx[r] = cstart + diagIdx[r]; 2326 } else idx[r] = offdiagIdx[r]; 2327 } 2328 } else { 2329 a[r] = offdiagA[r]; 2330 if (idx) idx[r] = offdiagIdx[r]; 2331 } 2332 } 2333 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2334 PetscCall(VecRestoreArrayWrite(v, &a)); 2335 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2336 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2337 PetscCall(VecDestroy(&diagV)); 2338 PetscCall(VecDestroy(&offdiagV)); 2339 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2344 { 2345 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2346 PetscInt m = A->rmap->n, n = A->cmap->n; 2347 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2348 PetscInt *cmap = mat->garray; 2349 PetscInt *diagIdx, *offdiagIdx; 2350 Vec diagV, offdiagV; 2351 PetscScalar *a, *diagA, *offdiagA; 2352 const PetscScalar *ba, *bav; 2353 PetscInt r, j, col, ncols, *bi, *bj; 2354 Mat B = mat->B; 2355 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2356 2357 PetscFunctionBegin; 2358 /* When a process holds entire A and other processes have no entry */ 2359 if (A->cmap->N == n) { 2360 PetscCall(VecGetArrayWrite(v, &diagA)); 2361 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2362 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2363 PetscCall(VecDestroy(&diagV)); 2364 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2365 PetscFunctionReturn(0); 2366 } else if (n == 0) { 2367 if (m) { 2368 PetscCall(VecGetArrayWrite(v, &a)); 2369 for (r = 0; r < m; r++) { 2370 a[r] = PETSC_MAX_REAL; 2371 if (idx) idx[r] = -1; 2372 } 2373 PetscCall(VecRestoreArrayWrite(v, &a)); 2374 } 2375 PetscFunctionReturn(0); 2376 } 2377 2378 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2379 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2380 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2381 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2382 2383 /* Get offdiagIdx[] for implicit 0.0 */ 2384 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2385 ba = bav; 2386 bi = b->i; 2387 bj = b->j; 2388 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2389 for (r = 0; r < m; r++) { 2390 ncols = bi[r + 1] - bi[r]; 2391 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2392 offdiagA[r] = *ba; 2393 offdiagIdx[r] = cmap[0]; 2394 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2395 offdiagA[r] = 0.0; 2396 2397 /* Find first hole in the cmap */ 2398 for (j = 0; j < ncols; j++) { 2399 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2400 if (col > j && j < cstart) { 2401 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2402 break; 2403 } else if (col > j + n && j >= cstart) { 2404 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2405 break; 2406 } 2407 } 2408 if (j == ncols && ncols < A->cmap->N - n) { 2409 /* a hole is outside compressed Bcols */ 2410 if (ncols == 0) { 2411 if (cstart) { 2412 offdiagIdx[r] = 0; 2413 } else offdiagIdx[r] = cend; 2414 } else { /* ncols > 0 */ 2415 offdiagIdx[r] = cmap[ncols - 1] + 1; 2416 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2417 } 2418 } 2419 } 2420 2421 for (j = 0; j < ncols; j++) { 2422 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2423 offdiagA[r] = *ba; 2424 offdiagIdx[r] = cmap[*bj]; 2425 } 2426 ba++; 2427 bj++; 2428 } 2429 } 2430 2431 PetscCall(VecGetArrayWrite(v, &a)); 2432 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2433 for (r = 0; r < m; ++r) { 2434 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2435 a[r] = diagA[r]; 2436 if (idx) idx[r] = cstart + diagIdx[r]; 2437 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2438 a[r] = diagA[r]; 2439 if (idx) { 2440 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2441 idx[r] = cstart + diagIdx[r]; 2442 } else idx[r] = offdiagIdx[r]; 2443 } 2444 } else { 2445 a[r] = offdiagA[r]; 2446 if (idx) idx[r] = offdiagIdx[r]; 2447 } 2448 } 2449 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2450 PetscCall(VecRestoreArrayWrite(v, &a)); 2451 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2452 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2453 PetscCall(VecDestroy(&diagV)); 2454 PetscCall(VecDestroy(&offdiagV)); 2455 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2456 PetscFunctionReturn(0); 2457 } 2458 2459 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2460 { 2461 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2462 PetscInt m = A->rmap->n, n = A->cmap->n; 2463 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2464 PetscInt *cmap = mat->garray; 2465 PetscInt *diagIdx, *offdiagIdx; 2466 Vec diagV, offdiagV; 2467 PetscScalar *a, *diagA, *offdiagA; 2468 const PetscScalar *ba, *bav; 2469 PetscInt r, j, col, ncols, *bi, *bj; 2470 Mat B = mat->B; 2471 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2472 2473 PetscFunctionBegin; 2474 /* When a process holds entire A and other processes have no entry */ 2475 if (A->cmap->N == n) { 2476 PetscCall(VecGetArrayWrite(v, &diagA)); 2477 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2478 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2479 PetscCall(VecDestroy(&diagV)); 2480 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2481 PetscFunctionReturn(0); 2482 } else if (n == 0) { 2483 if (m) { 2484 PetscCall(VecGetArrayWrite(v, &a)); 2485 for (r = 0; r < m; r++) { 2486 a[r] = PETSC_MIN_REAL; 2487 if (idx) idx[r] = -1; 2488 } 2489 PetscCall(VecRestoreArrayWrite(v, &a)); 2490 } 2491 PetscFunctionReturn(0); 2492 } 2493 2494 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2495 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2496 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2497 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2498 2499 /* Get offdiagIdx[] for implicit 0.0 */ 2500 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2501 ba = bav; 2502 bi = b->i; 2503 bj = b->j; 2504 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2505 for (r = 0; r < m; r++) { 2506 ncols = bi[r + 1] - bi[r]; 2507 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2508 offdiagA[r] = *ba; 2509 offdiagIdx[r] = cmap[0]; 2510 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2511 offdiagA[r] = 0.0; 2512 2513 /* Find first hole in the cmap */ 2514 for (j = 0; j < ncols; j++) { 2515 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2516 if (col > j && j < cstart) { 2517 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2518 break; 2519 } else if (col > j + n && j >= cstart) { 2520 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2521 break; 2522 } 2523 } 2524 if (j == ncols && ncols < A->cmap->N - n) { 2525 /* a hole is outside compressed Bcols */ 2526 if (ncols == 0) { 2527 if (cstart) { 2528 offdiagIdx[r] = 0; 2529 } else offdiagIdx[r] = cend; 2530 } else { /* ncols > 0 */ 2531 offdiagIdx[r] = cmap[ncols - 1] + 1; 2532 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2533 } 2534 } 2535 } 2536 2537 for (j = 0; j < ncols; j++) { 2538 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2539 offdiagA[r] = *ba; 2540 offdiagIdx[r] = cmap[*bj]; 2541 } 2542 ba++; 2543 bj++; 2544 } 2545 } 2546 2547 PetscCall(VecGetArrayWrite(v, &a)); 2548 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2549 for (r = 0; r < m; ++r) { 2550 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 if (idx) idx[r] = cstart + diagIdx[r]; 2553 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2554 a[r] = diagA[r]; 2555 if (idx) { 2556 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2557 idx[r] = cstart + diagIdx[r]; 2558 } else idx[r] = offdiagIdx[r]; 2559 } 2560 } else { 2561 a[r] = offdiagA[r]; 2562 if (idx) idx[r] = offdiagIdx[r]; 2563 } 2564 } 2565 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2566 PetscCall(VecRestoreArrayWrite(v, &a)); 2567 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2568 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2569 PetscCall(VecDestroy(&diagV)); 2570 PetscCall(VecDestroy(&offdiagV)); 2571 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2576 { 2577 Mat *dummy; 2578 2579 PetscFunctionBegin; 2580 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2581 *newmat = *dummy; 2582 PetscCall(PetscFree(dummy)); 2583 PetscFunctionReturn(0); 2584 } 2585 2586 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2587 { 2588 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2589 2590 PetscFunctionBegin; 2591 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2592 A->factorerrortype = a->A->factorerrortype; 2593 PetscFunctionReturn(0); 2594 } 2595 2596 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2597 { 2598 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2599 2600 PetscFunctionBegin; 2601 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2602 PetscCall(MatSetRandom(aij->A, rctx)); 2603 if (x->assembled) { 2604 PetscCall(MatSetRandom(aij->B, rctx)); 2605 } else { 2606 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2607 } 2608 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2609 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2614 { 2615 PetscFunctionBegin; 2616 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2617 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2618 PetscFunctionReturn(0); 2619 } 2620 2621 /*@ 2622 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2623 2624 Not collective 2625 2626 Input Parameter: 2627 . A - the matrix 2628 2629 Output Parameter: 2630 . nz - the number of nonzeros 2631 2632 Level: advanced 2633 2634 .seealso: `MATMPIAIJ`, `Mat` 2635 @*/ 2636 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2637 { 2638 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2639 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2640 2641 PetscFunctionBegin; 2642 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2643 PetscFunctionReturn(0); 2644 } 2645 2646 /*@ 2647 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2648 2649 Collective on A 2650 2651 Input Parameters: 2652 + A - the matrix 2653 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2654 2655 Level: advanced 2656 2657 @*/ 2658 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2659 { 2660 PetscFunctionBegin; 2661 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2666 { 2667 PetscBool sc = PETSC_FALSE, flg; 2668 2669 PetscFunctionBegin; 2670 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2671 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2672 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2673 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2674 PetscOptionsHeadEnd(); 2675 PetscFunctionReturn(0); 2676 } 2677 2678 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2679 { 2680 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2681 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2682 2683 PetscFunctionBegin; 2684 if (!Y->preallocated) { 2685 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2686 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2687 PetscInt nonew = aij->nonew; 2688 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2689 aij->nonew = nonew; 2690 } 2691 PetscCall(MatShift_Basic(Y, a)); 2692 PetscFunctionReturn(0); 2693 } 2694 2695 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2696 { 2697 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2698 2699 PetscFunctionBegin; 2700 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2701 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2702 if (d) { 2703 PetscInt rstart; 2704 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2705 *d += rstart; 2706 } 2707 PetscFunctionReturn(0); 2708 } 2709 2710 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2711 { 2712 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2713 2714 PetscFunctionBegin; 2715 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2716 PetscFunctionReturn(0); 2717 } 2718 2719 /* -------------------------------------------------------------------*/ 2720 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2721 MatGetRow_MPIAIJ, 2722 MatRestoreRow_MPIAIJ, 2723 MatMult_MPIAIJ, 2724 /* 4*/ MatMultAdd_MPIAIJ, 2725 MatMultTranspose_MPIAIJ, 2726 MatMultTransposeAdd_MPIAIJ, 2727 NULL, 2728 NULL, 2729 NULL, 2730 /*10*/ NULL, 2731 NULL, 2732 NULL, 2733 MatSOR_MPIAIJ, 2734 MatTranspose_MPIAIJ, 2735 /*15*/ MatGetInfo_MPIAIJ, 2736 MatEqual_MPIAIJ, 2737 MatGetDiagonal_MPIAIJ, 2738 MatDiagonalScale_MPIAIJ, 2739 MatNorm_MPIAIJ, 2740 /*20*/ MatAssemblyBegin_MPIAIJ, 2741 MatAssemblyEnd_MPIAIJ, 2742 MatSetOption_MPIAIJ, 2743 MatZeroEntries_MPIAIJ, 2744 /*24*/ MatZeroRows_MPIAIJ, 2745 NULL, 2746 NULL, 2747 NULL, 2748 NULL, 2749 /*29*/ MatSetUp_MPIAIJ, 2750 NULL, 2751 NULL, 2752 MatGetDiagonalBlock_MPIAIJ, 2753 NULL, 2754 /*34*/ MatDuplicate_MPIAIJ, 2755 NULL, 2756 NULL, 2757 NULL, 2758 NULL, 2759 /*39*/ MatAXPY_MPIAIJ, 2760 MatCreateSubMatrices_MPIAIJ, 2761 MatIncreaseOverlap_MPIAIJ, 2762 MatGetValues_MPIAIJ, 2763 MatCopy_MPIAIJ, 2764 /*44*/ MatGetRowMax_MPIAIJ, 2765 MatScale_MPIAIJ, 2766 MatShift_MPIAIJ, 2767 MatDiagonalSet_MPIAIJ, 2768 MatZeroRowsColumns_MPIAIJ, 2769 /*49*/ MatSetRandom_MPIAIJ, 2770 MatGetRowIJ_MPIAIJ, 2771 MatRestoreRowIJ_MPIAIJ, 2772 NULL, 2773 NULL, 2774 /*54*/ MatFDColoringCreate_MPIXAIJ, 2775 NULL, 2776 MatSetUnfactored_MPIAIJ, 2777 MatPermute_MPIAIJ, 2778 NULL, 2779 /*59*/ MatCreateSubMatrix_MPIAIJ, 2780 MatDestroy_MPIAIJ, 2781 MatView_MPIAIJ, 2782 NULL, 2783 NULL, 2784 /*64*/ NULL, 2785 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2786 NULL, 2787 NULL, 2788 NULL, 2789 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2790 MatGetRowMinAbs_MPIAIJ, 2791 NULL, 2792 NULL, 2793 NULL, 2794 NULL, 2795 /*75*/ MatFDColoringApply_AIJ, 2796 MatSetFromOptions_MPIAIJ, 2797 NULL, 2798 NULL, 2799 MatFindZeroDiagonals_MPIAIJ, 2800 /*80*/ NULL, 2801 NULL, 2802 NULL, 2803 /*83*/ MatLoad_MPIAIJ, 2804 MatIsSymmetric_MPIAIJ, 2805 NULL, 2806 NULL, 2807 NULL, 2808 NULL, 2809 /*89*/ NULL, 2810 NULL, 2811 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2812 NULL, 2813 NULL, 2814 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2815 NULL, 2816 NULL, 2817 NULL, 2818 MatBindToCPU_MPIAIJ, 2819 /*99*/ MatProductSetFromOptions_MPIAIJ, 2820 NULL, 2821 NULL, 2822 MatConjugate_MPIAIJ, 2823 NULL, 2824 /*104*/ MatSetValuesRow_MPIAIJ, 2825 MatRealPart_MPIAIJ, 2826 MatImaginaryPart_MPIAIJ, 2827 NULL, 2828 NULL, 2829 /*109*/ NULL, 2830 NULL, 2831 MatGetRowMin_MPIAIJ, 2832 NULL, 2833 MatMissingDiagonal_MPIAIJ, 2834 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2835 NULL, 2836 MatGetGhosts_MPIAIJ, 2837 NULL, 2838 NULL, 2839 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2840 NULL, 2841 NULL, 2842 NULL, 2843 MatGetMultiProcBlock_MPIAIJ, 2844 /*124*/ MatFindNonzeroRows_MPIAIJ, 2845 MatGetColumnReductions_MPIAIJ, 2846 MatInvertBlockDiagonal_MPIAIJ, 2847 MatInvertVariableBlockDiagonal_MPIAIJ, 2848 MatCreateSubMatricesMPI_MPIAIJ, 2849 /*129*/ NULL, 2850 NULL, 2851 NULL, 2852 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2853 NULL, 2854 /*134*/ NULL, 2855 NULL, 2856 NULL, 2857 NULL, 2858 NULL, 2859 /*139*/ MatSetBlockSizes_MPIAIJ, 2860 NULL, 2861 NULL, 2862 MatFDColoringSetUp_MPIXAIJ, 2863 MatFindOffBlockDiagonalEntries_MPIAIJ, 2864 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2865 /*145*/ NULL, 2866 NULL, 2867 NULL, 2868 MatCreateGraph_Simple_AIJ, 2869 NULL, 2870 /*150*/ NULL}; 2871 2872 /* ----------------------------------------------------------------------------------------*/ 2873 2874 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2875 { 2876 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2877 2878 PetscFunctionBegin; 2879 PetscCall(MatStoreValues(aij->A)); 2880 PetscCall(MatStoreValues(aij->B)); 2881 PetscFunctionReturn(0); 2882 } 2883 2884 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2885 { 2886 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2887 2888 PetscFunctionBegin; 2889 PetscCall(MatRetrieveValues(aij->A)); 2890 PetscCall(MatRetrieveValues(aij->B)); 2891 PetscFunctionReturn(0); 2892 } 2893 2894 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2895 { 2896 Mat_MPIAIJ *b; 2897 PetscMPIInt size; 2898 2899 PetscFunctionBegin; 2900 PetscCall(PetscLayoutSetUp(B->rmap)); 2901 PetscCall(PetscLayoutSetUp(B->cmap)); 2902 b = (Mat_MPIAIJ *)B->data; 2903 2904 #if defined(PETSC_USE_CTABLE) 2905 PetscCall(PetscTableDestroy(&b->colmap)); 2906 #else 2907 PetscCall(PetscFree(b->colmap)); 2908 #endif 2909 PetscCall(PetscFree(b->garray)); 2910 PetscCall(VecDestroy(&b->lvec)); 2911 PetscCall(VecScatterDestroy(&b->Mvctx)); 2912 2913 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2914 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2915 PetscCall(MatDestroy(&b->B)); 2916 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2917 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2918 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2919 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2920 2921 if (!B->preallocated) { 2922 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2923 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2924 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2925 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2926 } 2927 2928 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2929 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2930 B->preallocated = PETSC_TRUE; 2931 B->was_assembled = PETSC_FALSE; 2932 B->assembled = PETSC_FALSE; 2933 PetscFunctionReturn(0); 2934 } 2935 2936 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2937 { 2938 Mat_MPIAIJ *b; 2939 2940 PetscFunctionBegin; 2941 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2942 PetscCall(PetscLayoutSetUp(B->rmap)); 2943 PetscCall(PetscLayoutSetUp(B->cmap)); 2944 b = (Mat_MPIAIJ *)B->data; 2945 2946 #if defined(PETSC_USE_CTABLE) 2947 PetscCall(PetscTableDestroy(&b->colmap)); 2948 #else 2949 PetscCall(PetscFree(b->colmap)); 2950 #endif 2951 PetscCall(PetscFree(b->garray)); 2952 PetscCall(VecDestroy(&b->lvec)); 2953 PetscCall(VecScatterDestroy(&b->Mvctx)); 2954 2955 PetscCall(MatResetPreallocation(b->A)); 2956 PetscCall(MatResetPreallocation(b->B)); 2957 B->preallocated = PETSC_TRUE; 2958 B->was_assembled = PETSC_FALSE; 2959 B->assembled = PETSC_FALSE; 2960 PetscFunctionReturn(0); 2961 } 2962 2963 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2964 { 2965 Mat mat; 2966 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2967 2968 PetscFunctionBegin; 2969 *newmat = NULL; 2970 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2971 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2972 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2973 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2974 a = (Mat_MPIAIJ *)mat->data; 2975 2976 mat->factortype = matin->factortype; 2977 mat->assembled = matin->assembled; 2978 mat->insertmode = NOT_SET_VALUES; 2979 mat->preallocated = matin->preallocated; 2980 2981 a->size = oldmat->size; 2982 a->rank = oldmat->rank; 2983 a->donotstash = oldmat->donotstash; 2984 a->roworiented = oldmat->roworiented; 2985 a->rowindices = NULL; 2986 a->rowvalues = NULL; 2987 a->getrowactive = PETSC_FALSE; 2988 2989 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2990 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2991 2992 if (oldmat->colmap) { 2993 #if defined(PETSC_USE_CTABLE) 2994 PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap)); 2995 #else 2996 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2997 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2998 #endif 2999 } else a->colmap = NULL; 3000 if (oldmat->garray) { 3001 PetscInt len; 3002 len = oldmat->B->cmap->n; 3003 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3004 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3005 } else a->garray = NULL; 3006 3007 /* It may happen MatDuplicate is called with a non-assembled matrix 3008 In fact, MatDuplicate only requires the matrix to be preallocated 3009 This may happen inside a DMCreateMatrix_Shell */ 3010 if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); } 3011 if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); } 3012 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3013 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3014 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3015 *newmat = mat; 3016 PetscFunctionReturn(0); 3017 } 3018 3019 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3020 { 3021 PetscBool isbinary, ishdf5; 3022 3023 PetscFunctionBegin; 3024 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3025 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3026 /* force binary viewer to load .info file if it has not yet done so */ 3027 PetscCall(PetscViewerSetUp(viewer)); 3028 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3029 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3030 if (isbinary) { 3031 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3032 } else if (ishdf5) { 3033 #if defined(PETSC_HAVE_HDF5) 3034 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3035 #else 3036 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3037 #endif 3038 } else { 3039 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3040 } 3041 PetscFunctionReturn(0); 3042 } 3043 3044 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3045 { 3046 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3047 PetscInt *rowidxs, *colidxs; 3048 PetscScalar *matvals; 3049 3050 PetscFunctionBegin; 3051 PetscCall(PetscViewerSetUp(viewer)); 3052 3053 /* read in matrix header */ 3054 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3055 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3056 M = header[1]; 3057 N = header[2]; 3058 nz = header[3]; 3059 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3060 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3061 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3062 3063 /* set block sizes from the viewer's .info file */ 3064 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3065 /* set global sizes if not set already */ 3066 if (mat->rmap->N < 0) mat->rmap->N = M; 3067 if (mat->cmap->N < 0) mat->cmap->N = N; 3068 PetscCall(PetscLayoutSetUp(mat->rmap)); 3069 PetscCall(PetscLayoutSetUp(mat->cmap)); 3070 3071 /* check if the matrix sizes are correct */ 3072 PetscCall(MatGetSize(mat, &rows, &cols)); 3073 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3074 3075 /* read in row lengths and build row indices */ 3076 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3077 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3078 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3079 rowidxs[0] = 0; 3080 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3081 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3082 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3083 /* read in column indices and matrix values */ 3084 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3085 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3086 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3087 /* store matrix indices and values */ 3088 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3089 PetscCall(PetscFree(rowidxs)); 3090 PetscCall(PetscFree2(colidxs, matvals)); 3091 PetscFunctionReturn(0); 3092 } 3093 3094 /* Not scalable because of ISAllGather() unless getting all columns. */ 3095 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3096 { 3097 IS iscol_local; 3098 PetscBool isstride; 3099 PetscMPIInt lisstride = 0, gisstride; 3100 3101 PetscFunctionBegin; 3102 /* check if we are grabbing all columns*/ 3103 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3104 3105 if (isstride) { 3106 PetscInt start, len, mstart, mlen; 3107 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3108 PetscCall(ISGetLocalSize(iscol, &len)); 3109 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3110 if (mstart == start && mlen - mstart == len) lisstride = 1; 3111 } 3112 3113 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3114 if (gisstride) { 3115 PetscInt N; 3116 PetscCall(MatGetSize(mat, NULL, &N)); 3117 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3118 PetscCall(ISSetIdentity(iscol_local)); 3119 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3120 } else { 3121 PetscInt cbs; 3122 PetscCall(ISGetBlockSize(iscol, &cbs)); 3123 PetscCall(ISAllGather(iscol, &iscol_local)); 3124 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3125 } 3126 3127 *isseq = iscol_local; 3128 PetscFunctionReturn(0); 3129 } 3130 3131 /* 3132 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3133 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3134 3135 Input Parameters: 3136 mat - matrix 3137 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3138 i.e., mat->rstart <= isrow[i] < mat->rend 3139 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3140 i.e., mat->cstart <= iscol[i] < mat->cend 3141 Output Parameter: 3142 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3143 iscol_o - sequential column index set for retrieving mat->B 3144 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3145 */ 3146 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3147 { 3148 Vec x, cmap; 3149 const PetscInt *is_idx; 3150 PetscScalar *xarray, *cmaparray; 3151 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3153 Mat B = a->B; 3154 Vec lvec = a->lvec, lcmap; 3155 PetscInt i, cstart, cend, Bn = B->cmap->N; 3156 MPI_Comm comm; 3157 VecScatter Mvctx = a->Mvctx; 3158 3159 PetscFunctionBegin; 3160 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3161 PetscCall(ISGetLocalSize(iscol, &ncols)); 3162 3163 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3164 PetscCall(MatCreateVecs(mat, &x, NULL)); 3165 PetscCall(VecSet(x, -1.0)); 3166 PetscCall(VecDuplicate(x, &cmap)); 3167 PetscCall(VecSet(cmap, -1.0)); 3168 3169 /* Get start indices */ 3170 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3171 isstart -= ncols; 3172 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3173 3174 PetscCall(ISGetIndices(iscol, &is_idx)); 3175 PetscCall(VecGetArray(x, &xarray)); 3176 PetscCall(VecGetArray(cmap, &cmaparray)); 3177 PetscCall(PetscMalloc1(ncols, &idx)); 3178 for (i = 0; i < ncols; i++) { 3179 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3180 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3181 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3182 } 3183 PetscCall(VecRestoreArray(x, &xarray)); 3184 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3185 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3186 3187 /* Get iscol_d */ 3188 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3189 PetscCall(ISGetBlockSize(iscol, &i)); 3190 PetscCall(ISSetBlockSize(*iscol_d, i)); 3191 3192 /* Get isrow_d */ 3193 PetscCall(ISGetLocalSize(isrow, &m)); 3194 rstart = mat->rmap->rstart; 3195 PetscCall(PetscMalloc1(m, &idx)); 3196 PetscCall(ISGetIndices(isrow, &is_idx)); 3197 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3198 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3199 3200 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3201 PetscCall(ISGetBlockSize(isrow, &i)); 3202 PetscCall(ISSetBlockSize(*isrow_d, i)); 3203 3204 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3205 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3206 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3207 3208 PetscCall(VecDuplicate(lvec, &lcmap)); 3209 3210 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3211 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3212 3213 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3214 /* off-process column indices */ 3215 count = 0; 3216 PetscCall(PetscMalloc1(Bn, &idx)); 3217 PetscCall(PetscMalloc1(Bn, &cmap1)); 3218 3219 PetscCall(VecGetArray(lvec, &xarray)); 3220 PetscCall(VecGetArray(lcmap, &cmaparray)); 3221 for (i = 0; i < Bn; i++) { 3222 if (PetscRealPart(xarray[i]) > -1.0) { 3223 idx[count] = i; /* local column index in off-diagonal part B */ 3224 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3225 count++; 3226 } 3227 } 3228 PetscCall(VecRestoreArray(lvec, &xarray)); 3229 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3230 3231 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3232 /* cannot ensure iscol_o has same blocksize as iscol! */ 3233 3234 PetscCall(PetscFree(idx)); 3235 *garray = cmap1; 3236 3237 PetscCall(VecDestroy(&x)); 3238 PetscCall(VecDestroy(&cmap)); 3239 PetscCall(VecDestroy(&lcmap)); 3240 PetscFunctionReturn(0); 3241 } 3242 3243 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3244 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3245 { 3246 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3247 Mat M = NULL; 3248 MPI_Comm comm; 3249 IS iscol_d, isrow_d, iscol_o; 3250 Mat Asub = NULL, Bsub = NULL; 3251 PetscInt n; 3252 3253 PetscFunctionBegin; 3254 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3255 3256 if (call == MAT_REUSE_MATRIX) { 3257 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3258 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3259 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3260 3261 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3262 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3263 3264 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3265 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3266 3267 /* Update diagonal and off-diagonal portions of submat */ 3268 asub = (Mat_MPIAIJ *)(*submat)->data; 3269 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3270 PetscCall(ISGetLocalSize(iscol_o, &n)); 3271 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3272 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3273 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3274 3275 } else { /* call == MAT_INITIAL_MATRIX) */ 3276 const PetscInt *garray; 3277 PetscInt BsubN; 3278 3279 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3280 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3281 3282 /* Create local submatrices Asub and Bsub */ 3283 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3284 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3285 3286 /* Create submatrix M */ 3287 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3288 3289 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3290 asub = (Mat_MPIAIJ *)M->data; 3291 3292 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3293 n = asub->B->cmap->N; 3294 if (BsubN > n) { 3295 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3296 const PetscInt *idx; 3297 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3298 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3299 3300 PetscCall(PetscMalloc1(n, &idx_new)); 3301 j = 0; 3302 PetscCall(ISGetIndices(iscol_o, &idx)); 3303 for (i = 0; i < n; i++) { 3304 if (j >= BsubN) break; 3305 while (subgarray[i] > garray[j]) j++; 3306 3307 if (subgarray[i] == garray[j]) { 3308 idx_new[i] = idx[j++]; 3309 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3310 } 3311 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3312 3313 PetscCall(ISDestroy(&iscol_o)); 3314 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3315 3316 } else if (BsubN < n) { 3317 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3318 } 3319 3320 PetscCall(PetscFree(garray)); 3321 *submat = M; 3322 3323 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3324 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3325 PetscCall(ISDestroy(&isrow_d)); 3326 3327 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3328 PetscCall(ISDestroy(&iscol_d)); 3329 3330 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3331 PetscCall(ISDestroy(&iscol_o)); 3332 } 3333 PetscFunctionReturn(0); 3334 } 3335 3336 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3337 { 3338 IS iscol_local = NULL, isrow_d; 3339 PetscInt csize; 3340 PetscInt n, i, j, start, end; 3341 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3342 MPI_Comm comm; 3343 3344 PetscFunctionBegin; 3345 /* If isrow has same processor distribution as mat, 3346 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3347 if (call == MAT_REUSE_MATRIX) { 3348 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3349 if (isrow_d) { 3350 sameRowDist = PETSC_TRUE; 3351 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3352 } else { 3353 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3354 if (iscol_local) { 3355 sameRowDist = PETSC_TRUE; 3356 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3357 } 3358 } 3359 } else { 3360 /* Check if isrow has same processor distribution as mat */ 3361 sameDist[0] = PETSC_FALSE; 3362 PetscCall(ISGetLocalSize(isrow, &n)); 3363 if (!n) { 3364 sameDist[0] = PETSC_TRUE; 3365 } else { 3366 PetscCall(ISGetMinMax(isrow, &i, &j)); 3367 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3368 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3369 } 3370 3371 /* Check if iscol has same processor distribution as mat */ 3372 sameDist[1] = PETSC_FALSE; 3373 PetscCall(ISGetLocalSize(iscol, &n)); 3374 if (!n) { 3375 sameDist[1] = PETSC_TRUE; 3376 } else { 3377 PetscCall(ISGetMinMax(iscol, &i, &j)); 3378 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3379 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3380 } 3381 3382 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3383 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3384 sameRowDist = tsameDist[0]; 3385 } 3386 3387 if (sameRowDist) { 3388 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3389 /* isrow and iscol have same processor distribution as mat */ 3390 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3391 PetscFunctionReturn(0); 3392 } else { /* sameRowDist */ 3393 /* isrow has same processor distribution as mat */ 3394 if (call == MAT_INITIAL_MATRIX) { 3395 PetscBool sorted; 3396 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3397 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3398 PetscCall(ISGetSize(iscol, &i)); 3399 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3400 3401 PetscCall(ISSorted(iscol_local, &sorted)); 3402 if (sorted) { 3403 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3404 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3405 PetscFunctionReturn(0); 3406 } 3407 } else { /* call == MAT_REUSE_MATRIX */ 3408 IS iscol_sub; 3409 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3410 if (iscol_sub) { 3411 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3412 PetscFunctionReturn(0); 3413 } 3414 } 3415 } 3416 } 3417 3418 /* General case: iscol -> iscol_local which has global size of iscol */ 3419 if (call == MAT_REUSE_MATRIX) { 3420 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3421 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3422 } else { 3423 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3424 } 3425 3426 PetscCall(ISGetLocalSize(iscol, &csize)); 3427 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3428 3429 if (call == MAT_INITIAL_MATRIX) { 3430 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3431 PetscCall(ISDestroy(&iscol_local)); 3432 } 3433 PetscFunctionReturn(0); 3434 } 3435 3436 /*@C 3437 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3438 and "off-diagonal" part of the matrix in CSR format. 3439 3440 Collective 3441 3442 Input Parameters: 3443 + comm - MPI communicator 3444 . A - "diagonal" portion of matrix 3445 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3446 - garray - global index of B columns 3447 3448 Output Parameter: 3449 . mat - the matrix, with input A as its local diagonal matrix 3450 Level: advanced 3451 3452 Notes: 3453 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3454 3455 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3456 3457 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3458 @*/ 3459 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3460 { 3461 Mat_MPIAIJ *maij; 3462 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3463 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3464 const PetscScalar *oa; 3465 Mat Bnew; 3466 PetscInt m, n, N; 3467 MatType mpi_mat_type; 3468 3469 PetscFunctionBegin; 3470 PetscCall(MatCreate(comm, mat)); 3471 PetscCall(MatGetSize(A, &m, &n)); 3472 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3473 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3474 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3475 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3476 3477 /* Get global columns of mat */ 3478 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3479 3480 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3481 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3482 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3483 PetscCall(MatSetType(*mat, mpi_mat_type)); 3484 3485 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3486 maij = (Mat_MPIAIJ *)(*mat)->data; 3487 3488 (*mat)->preallocated = PETSC_TRUE; 3489 3490 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3491 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3492 3493 /* Set A as diagonal portion of *mat */ 3494 maij->A = A; 3495 3496 nz = oi[m]; 3497 for (i = 0; i < nz; i++) { 3498 col = oj[i]; 3499 oj[i] = garray[col]; 3500 } 3501 3502 /* Set Bnew as off-diagonal portion of *mat */ 3503 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3504 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3505 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3506 bnew = (Mat_SeqAIJ *)Bnew->data; 3507 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3508 maij->B = Bnew; 3509 3510 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3511 3512 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3513 b->free_a = PETSC_FALSE; 3514 b->free_ij = PETSC_FALSE; 3515 PetscCall(MatDestroy(&B)); 3516 3517 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3518 bnew->free_a = PETSC_TRUE; 3519 bnew->free_ij = PETSC_TRUE; 3520 3521 /* condense columns of maij->B */ 3522 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3523 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3524 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3525 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3526 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3527 PetscFunctionReturn(0); 3528 } 3529 3530 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3531 3532 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3533 { 3534 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3535 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3536 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3537 Mat M, Msub, B = a->B; 3538 MatScalar *aa; 3539 Mat_SeqAIJ *aij; 3540 PetscInt *garray = a->garray, *colsub, Ncols; 3541 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3542 IS iscol_sub, iscmap; 3543 const PetscInt *is_idx, *cmap; 3544 PetscBool allcolumns = PETSC_FALSE; 3545 MPI_Comm comm; 3546 3547 PetscFunctionBegin; 3548 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3549 if (call == MAT_REUSE_MATRIX) { 3550 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3551 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3552 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3553 3554 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3555 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3556 3557 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3558 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3559 3560 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3561 3562 } else { /* call == MAT_INITIAL_MATRIX) */ 3563 PetscBool flg; 3564 3565 PetscCall(ISGetLocalSize(iscol, &n)); 3566 PetscCall(ISGetSize(iscol, &Ncols)); 3567 3568 /* (1) iscol -> nonscalable iscol_local */ 3569 /* Check for special case: each processor gets entire matrix columns */ 3570 PetscCall(ISIdentity(iscol_local, &flg)); 3571 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3572 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3573 if (allcolumns) { 3574 iscol_sub = iscol_local; 3575 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3576 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3577 3578 } else { 3579 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3580 PetscInt *idx, *cmap1, k; 3581 PetscCall(PetscMalloc1(Ncols, &idx)); 3582 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3583 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3584 count = 0; 3585 k = 0; 3586 for (i = 0; i < Ncols; i++) { 3587 j = is_idx[i]; 3588 if (j >= cstart && j < cend) { 3589 /* diagonal part of mat */ 3590 idx[count] = j; 3591 cmap1[count++] = i; /* column index in submat */ 3592 } else if (Bn) { 3593 /* off-diagonal part of mat */ 3594 if (j == garray[k]) { 3595 idx[count] = j; 3596 cmap1[count++] = i; /* column index in submat */ 3597 } else if (j > garray[k]) { 3598 while (j > garray[k] && k < Bn - 1) k++; 3599 if (j == garray[k]) { 3600 idx[count] = j; 3601 cmap1[count++] = i; /* column index in submat */ 3602 } 3603 } 3604 } 3605 } 3606 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3607 3608 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3609 PetscCall(ISGetBlockSize(iscol, &cbs)); 3610 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3611 3612 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3613 } 3614 3615 /* (3) Create sequential Msub */ 3616 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3617 } 3618 3619 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3620 aij = (Mat_SeqAIJ *)(Msub)->data; 3621 ii = aij->i; 3622 PetscCall(ISGetIndices(iscmap, &cmap)); 3623 3624 /* 3625 m - number of local rows 3626 Ncols - number of columns (same on all processors) 3627 rstart - first row in new global matrix generated 3628 */ 3629 PetscCall(MatGetSize(Msub, &m, NULL)); 3630 3631 if (call == MAT_INITIAL_MATRIX) { 3632 /* (4) Create parallel newmat */ 3633 PetscMPIInt rank, size; 3634 PetscInt csize; 3635 3636 PetscCallMPI(MPI_Comm_size(comm, &size)); 3637 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3638 3639 /* 3640 Determine the number of non-zeros in the diagonal and off-diagonal 3641 portions of the matrix in order to do correct preallocation 3642 */ 3643 3644 /* first get start and end of "diagonal" columns */ 3645 PetscCall(ISGetLocalSize(iscol, &csize)); 3646 if (csize == PETSC_DECIDE) { 3647 PetscCall(ISGetSize(isrow, &mglobal)); 3648 if (mglobal == Ncols) { /* square matrix */ 3649 nlocal = m; 3650 } else { 3651 nlocal = Ncols / size + ((Ncols % size) > rank); 3652 } 3653 } else { 3654 nlocal = csize; 3655 } 3656 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3657 rstart = rend - nlocal; 3658 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3659 3660 /* next, compute all the lengths */ 3661 jj = aij->j; 3662 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3663 olens = dlens + m; 3664 for (i = 0; i < m; i++) { 3665 jend = ii[i + 1] - ii[i]; 3666 olen = 0; 3667 dlen = 0; 3668 for (j = 0; j < jend; j++) { 3669 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3670 else dlen++; 3671 jj++; 3672 } 3673 olens[i] = olen; 3674 dlens[i] = dlen; 3675 } 3676 3677 PetscCall(ISGetBlockSize(isrow, &bs)); 3678 PetscCall(ISGetBlockSize(iscol, &cbs)); 3679 3680 PetscCall(MatCreate(comm, &M)); 3681 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3682 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3683 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3684 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3685 PetscCall(PetscFree(dlens)); 3686 3687 } else { /* call == MAT_REUSE_MATRIX */ 3688 M = *newmat; 3689 PetscCall(MatGetLocalSize(M, &i, NULL)); 3690 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3691 PetscCall(MatZeroEntries(M)); 3692 /* 3693 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3694 rather than the slower MatSetValues(). 3695 */ 3696 M->was_assembled = PETSC_TRUE; 3697 M->assembled = PETSC_FALSE; 3698 } 3699 3700 /* (5) Set values of Msub to *newmat */ 3701 PetscCall(PetscMalloc1(count, &colsub)); 3702 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3703 3704 jj = aij->j; 3705 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3706 for (i = 0; i < m; i++) { 3707 row = rstart + i; 3708 nz = ii[i + 1] - ii[i]; 3709 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3710 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3711 jj += nz; 3712 aa += nz; 3713 } 3714 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3715 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3716 3717 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3718 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3719 3720 PetscCall(PetscFree(colsub)); 3721 3722 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3723 if (call == MAT_INITIAL_MATRIX) { 3724 *newmat = M; 3725 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3726 PetscCall(MatDestroy(&Msub)); 3727 3728 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3729 PetscCall(ISDestroy(&iscol_sub)); 3730 3731 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3732 PetscCall(ISDestroy(&iscmap)); 3733 3734 if (iscol_local) { 3735 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3736 PetscCall(ISDestroy(&iscol_local)); 3737 } 3738 } 3739 PetscFunctionReturn(0); 3740 } 3741 3742 /* 3743 Not great since it makes two copies of the submatrix, first an SeqAIJ 3744 in local and then by concatenating the local matrices the end result. 3745 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3746 3747 This requires a sequential iscol with all indices. 3748 */ 3749 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3750 { 3751 PetscMPIInt rank, size; 3752 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3753 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3754 Mat M, Mreuse; 3755 MatScalar *aa, *vwork; 3756 MPI_Comm comm; 3757 Mat_SeqAIJ *aij; 3758 PetscBool colflag, allcolumns = PETSC_FALSE; 3759 3760 PetscFunctionBegin; 3761 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3762 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3763 PetscCallMPI(MPI_Comm_size(comm, &size)); 3764 3765 /* Check for special case: each processor gets entire matrix columns */ 3766 PetscCall(ISIdentity(iscol, &colflag)); 3767 PetscCall(ISGetLocalSize(iscol, &n)); 3768 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3769 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3770 3771 if (call == MAT_REUSE_MATRIX) { 3772 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3773 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3774 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3775 } else { 3776 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3777 } 3778 3779 /* 3780 m - number of local rows 3781 n - number of columns (same on all processors) 3782 rstart - first row in new global matrix generated 3783 */ 3784 PetscCall(MatGetSize(Mreuse, &m, &n)); 3785 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3786 if (call == MAT_INITIAL_MATRIX) { 3787 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3788 ii = aij->i; 3789 jj = aij->j; 3790 3791 /* 3792 Determine the number of non-zeros in the diagonal and off-diagonal 3793 portions of the matrix in order to do correct preallocation 3794 */ 3795 3796 /* first get start and end of "diagonal" columns */ 3797 if (csize == PETSC_DECIDE) { 3798 PetscCall(ISGetSize(isrow, &mglobal)); 3799 if (mglobal == n) { /* square matrix */ 3800 nlocal = m; 3801 } else { 3802 nlocal = n / size + ((n % size) > rank); 3803 } 3804 } else { 3805 nlocal = csize; 3806 } 3807 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3808 rstart = rend - nlocal; 3809 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3810 3811 /* next, compute all the lengths */ 3812 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3813 olens = dlens + m; 3814 for (i = 0; i < m; i++) { 3815 jend = ii[i + 1] - ii[i]; 3816 olen = 0; 3817 dlen = 0; 3818 for (j = 0; j < jend; j++) { 3819 if (*jj < rstart || *jj >= rend) olen++; 3820 else dlen++; 3821 jj++; 3822 } 3823 olens[i] = olen; 3824 dlens[i] = dlen; 3825 } 3826 PetscCall(MatCreate(comm, &M)); 3827 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3828 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3829 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3830 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3831 PetscCall(PetscFree(dlens)); 3832 } else { 3833 PetscInt ml, nl; 3834 3835 M = *newmat; 3836 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3837 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3838 PetscCall(MatZeroEntries(M)); 3839 /* 3840 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3841 rather than the slower MatSetValues(). 3842 */ 3843 M->was_assembled = PETSC_TRUE; 3844 M->assembled = PETSC_FALSE; 3845 } 3846 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3847 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3848 ii = aij->i; 3849 jj = aij->j; 3850 3851 /* trigger copy to CPU if needed */ 3852 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3853 for (i = 0; i < m; i++) { 3854 row = rstart + i; 3855 nz = ii[i + 1] - ii[i]; 3856 cwork = jj; 3857 jj += nz; 3858 vwork = aa; 3859 aa += nz; 3860 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3861 } 3862 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3863 3864 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3865 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3866 *newmat = M; 3867 3868 /* save submatrix used in processor for next request */ 3869 if (call == MAT_INITIAL_MATRIX) { 3870 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3871 PetscCall(MatDestroy(&Mreuse)); 3872 } 3873 PetscFunctionReturn(0); 3874 } 3875 3876 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3877 { 3878 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3879 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3880 const PetscInt *JJ; 3881 PetscBool nooffprocentries; 3882 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3883 3884 PetscFunctionBegin; 3885 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3886 3887 PetscCall(PetscLayoutSetUp(B->rmap)); 3888 PetscCall(PetscLayoutSetUp(B->cmap)); 3889 m = B->rmap->n; 3890 cstart = B->cmap->rstart; 3891 cend = B->cmap->rend; 3892 rstart = B->rmap->rstart; 3893 3894 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3895 3896 if (PetscDefined(USE_DEBUG)) { 3897 for (i = 0; i < m; i++) { 3898 nnz = Ii[i + 1] - Ii[i]; 3899 JJ = J + Ii[i]; 3900 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3901 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3902 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3903 } 3904 } 3905 3906 for (i = 0; i < m; i++) { 3907 nnz = Ii[i + 1] - Ii[i]; 3908 JJ = J + Ii[i]; 3909 nnz_max = PetscMax(nnz_max, nnz); 3910 d = 0; 3911 for (j = 0; j < nnz; j++) { 3912 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3913 } 3914 d_nnz[i] = d; 3915 o_nnz[i] = nnz - d; 3916 } 3917 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3918 PetscCall(PetscFree2(d_nnz, o_nnz)); 3919 3920 for (i = 0; i < m; i++) { 3921 ii = i + rstart; 3922 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3923 } 3924 nooffprocentries = B->nooffprocentries; 3925 B->nooffprocentries = PETSC_TRUE; 3926 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3927 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3928 B->nooffprocentries = nooffprocentries; 3929 3930 /* count number of entries below block diagonal */ 3931 PetscCall(PetscFree(Aij->ld)); 3932 PetscCall(PetscCalloc1(m, &ld)); 3933 Aij->ld = ld; 3934 for (i = 0; i < m; i++) { 3935 nnz = Ii[i + 1] - Ii[i]; 3936 j = 0; 3937 while (j < nnz && J[j] < cstart) j++; 3938 ld[i] = j; 3939 J += nnz; 3940 } 3941 3942 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3943 PetscFunctionReturn(0); 3944 } 3945 3946 /*@ 3947 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3948 (the default parallel PETSc format). 3949 3950 Collective 3951 3952 Input Parameters: 3953 + B - the matrix 3954 . i - the indices into j for the start of each local row (starts with zero) 3955 . j - the column indices for each local row (starts with zero) 3956 - v - optional values in the matrix 3957 3958 Level: developer 3959 3960 Notes: 3961 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3962 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3963 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3964 3965 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3966 3967 The format which is used for the sparse matrix input, is equivalent to a 3968 row-major ordering.. i.e for the following matrix, the input data expected is 3969 as shown 3970 3971 $ 1 0 0 3972 $ 2 0 3 P0 3973 $ ------- 3974 $ 4 5 6 P1 3975 $ 3976 $ Process0 [P0]: rows_owned=[0,1] 3977 $ i = {0,1,3} [size = nrow+1 = 2+1] 3978 $ j = {0,0,2} [size = 3] 3979 $ v = {1,2,3} [size = 3] 3980 $ 3981 $ Process1 [P1]: rows_owned=[2] 3982 $ i = {0,3} [size = nrow+1 = 1+1] 3983 $ j = {0,1,2} [size = 3] 3984 $ v = {4,5,6} [size = 3] 3985 3986 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3987 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3988 @*/ 3989 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3990 { 3991 PetscFunctionBegin; 3992 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3993 PetscFunctionReturn(0); 3994 } 3995 3996 /*@C 3997 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3998 (the default parallel PETSc format). For good matrix assembly performance 3999 the user should preallocate the matrix storage by setting the parameters 4000 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4001 performance can be increased by more than a factor of 50. 4002 4003 Collective 4004 4005 Input Parameters: 4006 + B - the matrix 4007 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4008 (same value is used for all local rows) 4009 . d_nnz - array containing the number of nonzeros in the various rows of the 4010 DIAGONAL portion of the local submatrix (possibly different for each row) 4011 or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure. 4012 The size of this array is equal to the number of local rows, i.e 'm'. 4013 For matrices that will be factored, you must leave room for (and set) 4014 the diagonal entry even if it is zero. 4015 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4016 submatrix (same value is used for all local rows). 4017 - o_nnz - array containing the number of nonzeros in the various rows of the 4018 OFF-DIAGONAL portion of the local submatrix (possibly different for 4019 each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero 4020 structure. The size of this array is equal to the number 4021 of local rows, i.e 'm'. 4022 4023 If the *_nnz parameter is given then the *_nz parameter is ignored 4024 4025 The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77 4026 storage. The stored row and column indices begin with zero. 4027 See [Sparse Matrices](sec_matsparse) for details. 4028 4029 The parallel matrix is partitioned such that the first m0 rows belong to 4030 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4031 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4032 4033 The DIAGONAL portion of the local submatrix of a processor can be defined 4034 as the submatrix which is obtained by extraction the part corresponding to 4035 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4036 first row that belongs to the processor, r2 is the last row belonging to 4037 the this processor, and c1-c2 is range of indices of the local part of a 4038 vector suitable for applying the matrix to. This is an mxn matrix. In the 4039 common case of a square matrix, the row and column ranges are the same and 4040 the DIAGONAL part is also square. The remaining portion of the local 4041 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4042 4043 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4044 4045 You can call MatGetInfo() to get information on how effective the preallocation was; 4046 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4047 You can also run with the option -info and look for messages with the string 4048 malloc in them to see if additional memory allocation was needed. 4049 4050 Example usage: 4051 4052 Consider the following 8x8 matrix with 34 non-zero values, that is 4053 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4054 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4055 as follows: 4056 4057 .vb 4058 1 2 0 | 0 3 0 | 0 4 4059 Proc0 0 5 6 | 7 0 0 | 8 0 4060 9 0 10 | 11 0 0 | 12 0 4061 ------------------------------------- 4062 13 0 14 | 15 16 17 | 0 0 4063 Proc1 0 18 0 | 19 20 21 | 0 0 4064 0 0 0 | 22 23 0 | 24 0 4065 ------------------------------------- 4066 Proc2 25 26 27 | 0 0 28 | 29 0 4067 30 0 0 | 31 32 33 | 0 34 4068 .ve 4069 4070 This can be represented as a collection of submatrices as: 4071 4072 .vb 4073 A B C 4074 D E F 4075 G H I 4076 .ve 4077 4078 Where the submatrices A,B,C are owned by proc0, D,E,F are 4079 owned by proc1, G,H,I are owned by proc2. 4080 4081 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4082 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4083 The 'M','N' parameters are 8,8, and have the same values on all procs. 4084 4085 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4086 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4087 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4088 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4089 part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ 4090 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4091 4092 When d_nz, o_nz parameters are specified, d_nz storage elements are 4093 allocated for every row of the local diagonal submatrix, and o_nz 4094 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4095 One way to choose d_nz and o_nz is to use the max nonzerors per local 4096 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4097 In this case, the values of d_nz,o_nz are: 4098 .vb 4099 proc0 : dnz = 2, o_nz = 2 4100 proc1 : dnz = 3, o_nz = 2 4101 proc2 : dnz = 1, o_nz = 4 4102 .ve 4103 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4104 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4105 for proc3. i.e we are using 12+15+10=37 storage locations to store 4106 34 values. 4107 4108 When d_nnz, o_nnz parameters are specified, the storage is specified 4109 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4110 In the above case the values for d_nnz,o_nnz are: 4111 .vb 4112 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4113 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4114 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4115 .ve 4116 Here the space allocated is sum of all the above values i.e 34, and 4117 hence pre-allocation is perfect. 4118 4119 Level: intermediate 4120 4121 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4122 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4123 @*/ 4124 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4125 { 4126 PetscFunctionBegin; 4127 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4128 PetscValidType(B, 1); 4129 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4130 PetscFunctionReturn(0); 4131 } 4132 4133 /*@ 4134 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4135 CSR format for the local rows. 4136 4137 Collective 4138 4139 Input Parameters: 4140 + comm - MPI communicator 4141 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4142 . n - This value should be the same as the local size used in creating the 4143 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4144 calculated if N is given) For square matrices n is almost always m. 4145 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4146 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4147 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4148 . j - column indices 4149 - a - optional matrix values 4150 4151 Output Parameter: 4152 . mat - the matrix 4153 4154 Level: intermediate 4155 4156 Notes: 4157 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4158 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4159 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4160 4161 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4162 4163 The format which is used for the sparse matrix input, is equivalent to a 4164 row-major ordering.. i.e for the following matrix, the input data expected is 4165 as shown 4166 4167 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4168 4169 $ 1 0 0 4170 $ 2 0 3 P0 4171 $ ------- 4172 $ 4 5 6 P1 4173 $ 4174 $ Process0 [P0]: rows_owned=[0,1] 4175 $ i = {0,1,3} [size = nrow+1 = 2+1] 4176 $ j = {0,0,2} [size = 3] 4177 $ v = {1,2,3} [size = 3] 4178 $ 4179 $ Process1 [P1]: rows_owned=[2] 4180 $ i = {0,3} [size = nrow+1 = 1+1] 4181 $ j = {0,1,2} [size = 3] 4182 $ v = {4,5,6} [size = 3] 4183 4184 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4185 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4186 @*/ 4187 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4188 { 4189 PetscFunctionBegin; 4190 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4191 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4192 PetscCall(MatCreate(comm, mat)); 4193 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4194 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4195 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4196 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4197 PetscFunctionReturn(0); 4198 } 4199 4200 /*@ 4201 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4202 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()` 4203 4204 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4205 4206 Collective 4207 4208 Input Parameters: 4209 + mat - the matrix 4210 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4211 . n - This value should be the same as the local size used in creating the 4212 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4213 calculated if N is given) For square matrices n is almost always m. 4214 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4215 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4216 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4217 . J - column indices 4218 - v - matrix values 4219 4220 Level: intermediate 4221 4222 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4223 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4224 @*/ 4225 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4226 { 4227 PetscInt nnz, i; 4228 PetscBool nooffprocentries; 4229 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4230 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4231 PetscScalar *ad, *ao; 4232 PetscInt ldi, Iii, md; 4233 const PetscInt *Adi = Ad->i; 4234 PetscInt *ld = Aij->ld; 4235 4236 PetscFunctionBegin; 4237 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4238 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4239 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4240 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4241 4242 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4243 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4244 4245 for (i = 0; i < m; i++) { 4246 nnz = Ii[i + 1] - Ii[i]; 4247 Iii = Ii[i]; 4248 ldi = ld[i]; 4249 md = Adi[i + 1] - Adi[i]; 4250 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4251 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4252 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4253 ad += md; 4254 ao += nnz - md; 4255 } 4256 nooffprocentries = mat->nooffprocentries; 4257 mat->nooffprocentries = PETSC_TRUE; 4258 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4259 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4260 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4261 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4262 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4263 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4264 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4265 mat->nooffprocentries = nooffprocentries; 4266 PetscFunctionReturn(0); 4267 } 4268 4269 /*@ 4270 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4271 4272 Collective 4273 4274 Input Parameters: 4275 + mat - the matrix 4276 - v - matrix values, stored by row 4277 4278 Level: intermediate 4279 4280 Note: 4281 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4282 4283 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4284 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4285 @*/ 4286 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4287 { 4288 PetscInt nnz, i, m; 4289 PetscBool nooffprocentries; 4290 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4291 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4292 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4293 PetscScalar *ad, *ao; 4294 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4295 PetscInt ldi, Iii, md; 4296 PetscInt *ld = Aij->ld; 4297 4298 PetscFunctionBegin; 4299 m = mat->rmap->n; 4300 4301 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4302 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4303 Iii = 0; 4304 for (i = 0; i < m; i++) { 4305 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4306 ldi = ld[i]; 4307 md = Adi[i + 1] - Adi[i]; 4308 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4309 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4310 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4311 ad += md; 4312 ao += nnz - md; 4313 Iii += nnz; 4314 } 4315 nooffprocentries = mat->nooffprocentries; 4316 mat->nooffprocentries = PETSC_TRUE; 4317 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4318 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4319 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4320 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4321 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4322 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4323 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4324 mat->nooffprocentries = nooffprocentries; 4325 PetscFunctionReturn(0); 4326 } 4327 4328 /*@C 4329 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4330 (the default parallel PETSc format). For good matrix assembly performance 4331 the user should preallocate the matrix storage by setting the parameters 4332 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4333 performance can be increased by more than a factor of 50. 4334 4335 Collective 4336 4337 Input Parameters: 4338 + comm - MPI communicator 4339 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4340 This value should be the same as the local size used in creating the 4341 y vector for the matrix-vector product y = Ax. 4342 . n - This value should be the same as the local size used in creating the 4343 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4344 calculated if N is given) For square matrices n is almost always m. 4345 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4346 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4347 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4348 (same value is used for all local rows) 4349 . d_nnz - array containing the number of nonzeros in the various rows of the 4350 DIAGONAL portion of the local submatrix (possibly different for each row) 4351 or NULL, if d_nz is used to specify the nonzero structure. 4352 The size of this array is equal to the number of local rows, i.e 'm'. 4353 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4354 submatrix (same value is used for all local rows). 4355 - o_nnz - array containing the number of nonzeros in the various rows of the 4356 OFF-DIAGONAL portion of the local submatrix (possibly different for 4357 each row) or NULL, if o_nz is used to specify the nonzero 4358 structure. The size of this array is equal to the number 4359 of local rows, i.e 'm'. 4360 4361 Output Parameter: 4362 . A - the matrix 4363 4364 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4365 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4366 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4367 4368 Notes: 4369 If the *_nnz parameter is given then the *_nz parameter is ignored 4370 4371 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4372 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4373 storage requirements for this matrix. 4374 4375 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4376 processor than it must be used on all processors that share the object for 4377 that argument. 4378 4379 The user MUST specify either the local or global matrix dimensions 4380 (possibly both). 4381 4382 The parallel matrix is partitioned across processors such that the 4383 first m0 rows belong to process 0, the next m1 rows belong to 4384 process 1, the next m2 rows belong to process 2 etc.. where 4385 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4386 values corresponding to [m x N] submatrix. 4387 4388 The columns are logically partitioned with the n0 columns belonging 4389 to 0th partition, the next n1 columns belonging to the next 4390 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4391 4392 The DIAGONAL portion of the local submatrix on any given processor 4393 is the submatrix corresponding to the rows and columns m,n 4394 corresponding to the given processor. i.e diagonal matrix on 4395 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4396 etc. The remaining portion of the local submatrix [m x (N-n)] 4397 constitute the OFF-DIAGONAL portion. The example below better 4398 illustrates this concept. 4399 4400 For a square global matrix we define each processor's diagonal portion 4401 to be its local rows and the corresponding columns (a square submatrix); 4402 each processor's off-diagonal portion encompasses the remainder of the 4403 local matrix (a rectangular submatrix). 4404 4405 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4406 4407 When calling this routine with a single process communicator, a matrix of 4408 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4409 type of communicator, use the construction mechanism 4410 .vb 4411 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4412 .ve 4413 4414 $ MatCreate(...,&A); 4415 $ MatSetType(A,MATMPIAIJ); 4416 $ MatSetSizes(A, m,n,M,N); 4417 $ MatMPIAIJSetPreallocation(A,...); 4418 4419 By default, this format uses inodes (identical nodes) when possible. 4420 We search for consecutive rows with the same nonzero structure, thereby 4421 reusing matrix information to achieve increased efficiency. 4422 4423 Options Database Keys: 4424 + -mat_no_inode - Do not use inodes 4425 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4426 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4427 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4428 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4429 4430 Example usage: 4431 4432 Consider the following 8x8 matrix with 34 non-zero values, that is 4433 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4434 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4435 as follows 4436 4437 .vb 4438 1 2 0 | 0 3 0 | 0 4 4439 Proc0 0 5 6 | 7 0 0 | 8 0 4440 9 0 10 | 11 0 0 | 12 0 4441 ------------------------------------- 4442 13 0 14 | 15 16 17 | 0 0 4443 Proc1 0 18 0 | 19 20 21 | 0 0 4444 0 0 0 | 22 23 0 | 24 0 4445 ------------------------------------- 4446 Proc2 25 26 27 | 0 0 28 | 29 0 4447 30 0 0 | 31 32 33 | 0 34 4448 .ve 4449 4450 This can be represented as a collection of submatrices as 4451 4452 .vb 4453 A B C 4454 D E F 4455 G H I 4456 .ve 4457 4458 Where the submatrices A,B,C are owned by proc0, D,E,F are 4459 owned by proc1, G,H,I are owned by proc2. 4460 4461 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4462 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4463 The 'M','N' parameters are 8,8, and have the same values on all procs. 4464 4465 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4466 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4467 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4468 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4469 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4470 matrix, ans [DF] as another SeqAIJ matrix. 4471 4472 When d_nz, o_nz parameters are specified, d_nz storage elements are 4473 allocated for every row of the local diagonal submatrix, and o_nz 4474 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4475 One way to choose d_nz and o_nz is to use the max nonzerors per local 4476 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4477 In this case, the values of d_nz,o_nz are 4478 .vb 4479 proc0 : dnz = 2, o_nz = 2 4480 proc1 : dnz = 3, o_nz = 2 4481 proc2 : dnz = 1, o_nz = 4 4482 .ve 4483 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4484 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4485 for proc3. i.e we are using 12+15+10=37 storage locations to store 4486 34 values. 4487 4488 When d_nnz, o_nnz parameters are specified, the storage is specified 4489 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4490 In the above case the values for d_nnz,o_nnz are 4491 .vb 4492 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4493 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4494 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4495 .ve 4496 Here the space allocated is sum of all the above values i.e 34, and 4497 hence pre-allocation is perfect. 4498 4499 Level: intermediate 4500 4501 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4502 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4503 @*/ 4504 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4505 { 4506 PetscMPIInt size; 4507 4508 PetscFunctionBegin; 4509 PetscCall(MatCreate(comm, A)); 4510 PetscCall(MatSetSizes(*A, m, n, M, N)); 4511 PetscCallMPI(MPI_Comm_size(comm, &size)); 4512 if (size > 1) { 4513 PetscCall(MatSetType(*A, MATMPIAIJ)); 4514 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4515 } else { 4516 PetscCall(MatSetType(*A, MATSEQAIJ)); 4517 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4518 } 4519 PetscFunctionReturn(0); 4520 } 4521 4522 /*@C 4523 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4524 4525 Not collective 4526 4527 Input Parameter: 4528 . A - The `MATMPIAIJ` matrix 4529 4530 Output Parameters: 4531 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4532 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4533 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4534 4535 Note: 4536 The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4537 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4538 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4539 local column numbers to global column numbers in the original matrix. 4540 4541 Level: intermediate 4542 4543 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4544 @*/ 4545 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4546 { 4547 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4548 PetscBool flg; 4549 4550 PetscFunctionBegin; 4551 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4552 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4553 if (Ad) *Ad = a->A; 4554 if (Ao) *Ao = a->B; 4555 if (colmap) *colmap = a->garray; 4556 PetscFunctionReturn(0); 4557 } 4558 4559 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4560 { 4561 PetscInt m, N, i, rstart, nnz, Ii; 4562 PetscInt *indx; 4563 PetscScalar *values; 4564 MatType rootType; 4565 4566 PetscFunctionBegin; 4567 PetscCall(MatGetSize(inmat, &m, &N)); 4568 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4569 PetscInt *dnz, *onz, sum, bs, cbs; 4570 4571 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4572 /* Check sum(n) = N */ 4573 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4574 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4575 4576 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4577 rstart -= m; 4578 4579 MatPreallocateBegin(comm, m, n, dnz, onz); 4580 for (i = 0; i < m; i++) { 4581 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4582 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4583 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4584 } 4585 4586 PetscCall(MatCreate(comm, outmat)); 4587 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4588 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4589 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4590 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4591 PetscCall(MatSetType(*outmat, rootType)); 4592 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4593 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4594 MatPreallocateEnd(dnz, onz); 4595 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4596 } 4597 4598 /* numeric phase */ 4599 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4600 for (i = 0; i < m; i++) { 4601 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4602 Ii = i + rstart; 4603 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4604 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4605 } 4606 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4607 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4608 PetscFunctionReturn(0); 4609 } 4610 4611 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4612 { 4613 PetscMPIInt rank; 4614 PetscInt m, N, i, rstart, nnz; 4615 size_t len; 4616 const PetscInt *indx; 4617 PetscViewer out; 4618 char *name; 4619 Mat B; 4620 const PetscScalar *values; 4621 4622 PetscFunctionBegin; 4623 PetscCall(MatGetLocalSize(A, &m, NULL)); 4624 PetscCall(MatGetSize(A, NULL, &N)); 4625 /* Should this be the type of the diagonal block of A? */ 4626 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4627 PetscCall(MatSetSizes(B, m, N, m, N)); 4628 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4629 PetscCall(MatSetType(B, MATSEQAIJ)); 4630 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4631 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4632 for (i = 0; i < m; i++) { 4633 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4634 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4635 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4636 } 4637 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4638 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4639 4640 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4641 PetscCall(PetscStrlen(outfile, &len)); 4642 PetscCall(PetscMalloc1(len + 6, &name)); 4643 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4644 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4645 PetscCall(PetscFree(name)); 4646 PetscCall(MatView(B, out)); 4647 PetscCall(PetscViewerDestroy(&out)); 4648 PetscCall(MatDestroy(&B)); 4649 PetscFunctionReturn(0); 4650 } 4651 4652 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4653 { 4654 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4655 4656 PetscFunctionBegin; 4657 if (!merge) PetscFunctionReturn(0); 4658 PetscCall(PetscFree(merge->id_r)); 4659 PetscCall(PetscFree(merge->len_s)); 4660 PetscCall(PetscFree(merge->len_r)); 4661 PetscCall(PetscFree(merge->bi)); 4662 PetscCall(PetscFree(merge->bj)); 4663 PetscCall(PetscFree(merge->buf_ri[0])); 4664 PetscCall(PetscFree(merge->buf_ri)); 4665 PetscCall(PetscFree(merge->buf_rj[0])); 4666 PetscCall(PetscFree(merge->buf_rj)); 4667 PetscCall(PetscFree(merge->coi)); 4668 PetscCall(PetscFree(merge->coj)); 4669 PetscCall(PetscFree(merge->owners_co)); 4670 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4671 PetscCall(PetscFree(merge)); 4672 PetscFunctionReturn(0); 4673 } 4674 4675 #include <../src/mat/utils/freespace.h> 4676 #include <petscbt.h> 4677 4678 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4679 { 4680 MPI_Comm comm; 4681 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4682 PetscMPIInt size, rank, taga, *len_s; 4683 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4684 PetscInt proc, m; 4685 PetscInt **buf_ri, **buf_rj; 4686 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4687 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4688 MPI_Request *s_waits, *r_waits; 4689 MPI_Status *status; 4690 const MatScalar *aa, *a_a; 4691 MatScalar **abuf_r, *ba_i; 4692 Mat_Merge_SeqsToMPI *merge; 4693 PetscContainer container; 4694 4695 PetscFunctionBegin; 4696 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4697 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4698 4699 PetscCallMPI(MPI_Comm_size(comm, &size)); 4700 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4701 4702 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4703 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4704 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4705 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4706 aa = a_a; 4707 4708 bi = merge->bi; 4709 bj = merge->bj; 4710 buf_ri = merge->buf_ri; 4711 buf_rj = merge->buf_rj; 4712 4713 PetscCall(PetscMalloc1(size, &status)); 4714 owners = merge->rowmap->range; 4715 len_s = merge->len_s; 4716 4717 /* send and recv matrix values */ 4718 /*-----------------------------*/ 4719 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4720 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4721 4722 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4723 for (proc = 0, k = 0; proc < size; proc++) { 4724 if (!len_s[proc]) continue; 4725 i = owners[proc]; 4726 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4727 k++; 4728 } 4729 4730 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4731 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4732 PetscCall(PetscFree(status)); 4733 4734 PetscCall(PetscFree(s_waits)); 4735 PetscCall(PetscFree(r_waits)); 4736 4737 /* insert mat values of mpimat */ 4738 /*----------------------------*/ 4739 PetscCall(PetscMalloc1(N, &ba_i)); 4740 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4741 4742 for (k = 0; k < merge->nrecv; k++) { 4743 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4744 nrows = *(buf_ri_k[k]); 4745 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4746 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4747 } 4748 4749 /* set values of ba */ 4750 m = merge->rowmap->n; 4751 for (i = 0; i < m; i++) { 4752 arow = owners[rank] + i; 4753 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4754 bnzi = bi[i + 1] - bi[i]; 4755 PetscCall(PetscArrayzero(ba_i, bnzi)); 4756 4757 /* add local non-zero vals of this proc's seqmat into ba */ 4758 anzi = ai[arow + 1] - ai[arow]; 4759 aj = a->j + ai[arow]; 4760 aa = a_a + ai[arow]; 4761 nextaj = 0; 4762 for (j = 0; nextaj < anzi; j++) { 4763 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4764 ba_i[j] += aa[nextaj++]; 4765 } 4766 } 4767 4768 /* add received vals into ba */ 4769 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4770 /* i-th row */ 4771 if (i == *nextrow[k]) { 4772 anzi = *(nextai[k] + 1) - *nextai[k]; 4773 aj = buf_rj[k] + *(nextai[k]); 4774 aa = abuf_r[k] + *(nextai[k]); 4775 nextaj = 0; 4776 for (j = 0; nextaj < anzi; j++) { 4777 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4778 ba_i[j] += aa[nextaj++]; 4779 } 4780 } 4781 nextrow[k]++; 4782 nextai[k]++; 4783 } 4784 } 4785 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4786 } 4787 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4788 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4789 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4790 4791 PetscCall(PetscFree(abuf_r[0])); 4792 PetscCall(PetscFree(abuf_r)); 4793 PetscCall(PetscFree(ba_i)); 4794 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4795 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4796 PetscFunctionReturn(0); 4797 } 4798 4799 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4800 { 4801 Mat B_mpi; 4802 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4803 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4804 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4805 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4806 PetscInt len, proc, *dnz, *onz, bs, cbs; 4807 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4808 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4809 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4810 MPI_Status *status; 4811 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4812 PetscBT lnkbt; 4813 Mat_Merge_SeqsToMPI *merge; 4814 PetscContainer container; 4815 4816 PetscFunctionBegin; 4817 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4818 4819 /* make sure it is a PETSc comm */ 4820 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4821 PetscCallMPI(MPI_Comm_size(comm, &size)); 4822 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4823 4824 PetscCall(PetscNew(&merge)); 4825 PetscCall(PetscMalloc1(size, &status)); 4826 4827 /* determine row ownership */ 4828 /*---------------------------------------------------------*/ 4829 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4830 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4831 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4832 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4833 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4834 PetscCall(PetscMalloc1(size, &len_si)); 4835 PetscCall(PetscMalloc1(size, &merge->len_s)); 4836 4837 m = merge->rowmap->n; 4838 owners = merge->rowmap->range; 4839 4840 /* determine the number of messages to send, their lengths */ 4841 /*---------------------------------------------------------*/ 4842 len_s = merge->len_s; 4843 4844 len = 0; /* length of buf_si[] */ 4845 merge->nsend = 0; 4846 for (proc = 0; proc < size; proc++) { 4847 len_si[proc] = 0; 4848 if (proc == rank) { 4849 len_s[proc] = 0; 4850 } else { 4851 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4852 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4853 } 4854 if (len_s[proc]) { 4855 merge->nsend++; 4856 nrows = 0; 4857 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4858 if (ai[i + 1] > ai[i]) nrows++; 4859 } 4860 len_si[proc] = 2 * (nrows + 1); 4861 len += len_si[proc]; 4862 } 4863 } 4864 4865 /* determine the number and length of messages to receive for ij-structure */ 4866 /*-------------------------------------------------------------------------*/ 4867 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4868 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4869 4870 /* post the Irecv of j-structure */ 4871 /*-------------------------------*/ 4872 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4873 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4874 4875 /* post the Isend of j-structure */ 4876 /*--------------------------------*/ 4877 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4878 4879 for (proc = 0, k = 0; proc < size; proc++) { 4880 if (!len_s[proc]) continue; 4881 i = owners[proc]; 4882 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4883 k++; 4884 } 4885 4886 /* receives and sends of j-structure are complete */ 4887 /*------------------------------------------------*/ 4888 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4889 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4890 4891 /* send and recv i-structure */ 4892 /*---------------------------*/ 4893 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4894 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4895 4896 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4897 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4898 for (proc = 0, k = 0; proc < size; proc++) { 4899 if (!len_s[proc]) continue; 4900 /* form outgoing message for i-structure: 4901 buf_si[0]: nrows to be sent 4902 [1:nrows]: row index (global) 4903 [nrows+1:2*nrows+1]: i-structure index 4904 */ 4905 /*-------------------------------------------*/ 4906 nrows = len_si[proc] / 2 - 1; 4907 buf_si_i = buf_si + nrows + 1; 4908 buf_si[0] = nrows; 4909 buf_si_i[0] = 0; 4910 nrows = 0; 4911 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4912 anzi = ai[i + 1] - ai[i]; 4913 if (anzi) { 4914 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4915 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4916 nrows++; 4917 } 4918 } 4919 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4920 k++; 4921 buf_si += len_si[proc]; 4922 } 4923 4924 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4925 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4926 4927 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4928 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4929 4930 PetscCall(PetscFree(len_si)); 4931 PetscCall(PetscFree(len_ri)); 4932 PetscCall(PetscFree(rj_waits)); 4933 PetscCall(PetscFree2(si_waits, sj_waits)); 4934 PetscCall(PetscFree(ri_waits)); 4935 PetscCall(PetscFree(buf_s)); 4936 PetscCall(PetscFree(status)); 4937 4938 /* compute a local seq matrix in each processor */ 4939 /*----------------------------------------------*/ 4940 /* allocate bi array and free space for accumulating nonzero column info */ 4941 PetscCall(PetscMalloc1(m + 1, &bi)); 4942 bi[0] = 0; 4943 4944 /* create and initialize a linked list */ 4945 nlnk = N + 1; 4946 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4947 4948 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4949 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4950 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4951 4952 current_space = free_space; 4953 4954 /* determine symbolic info for each local row */ 4955 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4956 4957 for (k = 0; k < merge->nrecv; k++) { 4958 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4959 nrows = *buf_ri_k[k]; 4960 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4961 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4962 } 4963 4964 MatPreallocateBegin(comm, m, n, dnz, onz); 4965 len = 0; 4966 for (i = 0; i < m; i++) { 4967 bnzi = 0; 4968 /* add local non-zero cols of this proc's seqmat into lnk */ 4969 arow = owners[rank] + i; 4970 anzi = ai[arow + 1] - ai[arow]; 4971 aj = a->j + ai[arow]; 4972 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4973 bnzi += nlnk; 4974 /* add received col data into lnk */ 4975 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4976 if (i == *nextrow[k]) { /* i-th row */ 4977 anzi = *(nextai[k] + 1) - *nextai[k]; 4978 aj = buf_rj[k] + *nextai[k]; 4979 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4980 bnzi += nlnk; 4981 nextrow[k]++; 4982 nextai[k]++; 4983 } 4984 } 4985 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4986 4987 /* if free space is not available, make more free space */ 4988 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4989 /* copy data into free space, then initialize lnk */ 4990 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4991 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4992 4993 current_space->array += bnzi; 4994 current_space->local_used += bnzi; 4995 current_space->local_remaining -= bnzi; 4996 4997 bi[i + 1] = bi[i] + bnzi; 4998 } 4999 5000 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5001 5002 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5003 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5004 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5005 5006 /* create symbolic parallel matrix B_mpi */ 5007 /*---------------------------------------*/ 5008 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5009 PetscCall(MatCreate(comm, &B_mpi)); 5010 if (n == PETSC_DECIDE) { 5011 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5012 } else { 5013 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5014 } 5015 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5016 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5017 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5018 MatPreallocateEnd(dnz, onz); 5019 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5020 5021 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5022 B_mpi->assembled = PETSC_FALSE; 5023 merge->bi = bi; 5024 merge->bj = bj; 5025 merge->buf_ri = buf_ri; 5026 merge->buf_rj = buf_rj; 5027 merge->coi = NULL; 5028 merge->coj = NULL; 5029 merge->owners_co = NULL; 5030 5031 PetscCall(PetscCommDestroy(&comm)); 5032 5033 /* attach the supporting struct to B_mpi for reuse */ 5034 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5035 PetscCall(PetscContainerSetPointer(container, merge)); 5036 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5037 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5038 PetscCall(PetscContainerDestroy(&container)); 5039 *mpimat = B_mpi; 5040 5041 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5042 PetscFunctionReturn(0); 5043 } 5044 5045 /*@C 5046 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5047 matrices from each processor 5048 5049 Collective 5050 5051 Input Parameters: 5052 + comm - the communicators the parallel matrix will live on 5053 . seqmat - the input sequential matrices 5054 . m - number of local rows (or `PETSC_DECIDE`) 5055 . n - number of local columns (or `PETSC_DECIDE`) 5056 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5057 5058 Output Parameter: 5059 . mpimat - the parallel matrix generated 5060 5061 Level: advanced 5062 5063 Note: 5064 The dimensions of the sequential matrix in each processor MUST be the same. 5065 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5066 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5067 @*/ 5068 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5069 { 5070 PetscMPIInt size; 5071 5072 PetscFunctionBegin; 5073 PetscCallMPI(MPI_Comm_size(comm, &size)); 5074 if (size == 1) { 5075 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5076 if (scall == MAT_INITIAL_MATRIX) { 5077 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5078 } else { 5079 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5080 } 5081 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5082 PetscFunctionReturn(0); 5083 } 5084 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5085 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5086 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5087 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5088 PetscFunctionReturn(0); 5089 } 5090 5091 /*@ 5092 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5093 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5094 with `MatGetSize()` 5095 5096 Not Collective 5097 5098 Input Parameters: 5099 + A - the matrix 5100 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5101 5102 Output Parameter: 5103 . A_loc - the local sequential matrix generated 5104 5105 Level: developer 5106 5107 Notes: 5108 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5109 5110 Destroy the matrix with `MatDestroy()` 5111 5112 .seealso: `MatMPIAIJGetLocalMat()` 5113 @*/ 5114 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5115 { 5116 PetscBool mpi; 5117 5118 PetscFunctionBegin; 5119 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5120 if (mpi) { 5121 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5122 } else { 5123 *A_loc = A; 5124 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5125 } 5126 PetscFunctionReturn(0); 5127 } 5128 5129 /*@ 5130 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5131 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5132 with `MatGetSize()` 5133 5134 Not Collective 5135 5136 Input Parameters: 5137 + A - the matrix 5138 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5139 5140 Output Parameter: 5141 . A_loc - the local sequential matrix generated 5142 5143 Level: developer 5144 5145 Notes: 5146 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5147 5148 When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A. 5149 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called. 5150 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5151 modify the values of the returned A_loc. 5152 5153 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5154 @*/ 5155 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5156 { 5157 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5158 Mat_SeqAIJ *mat, *a, *b; 5159 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5160 const PetscScalar *aa, *ba, *aav, *bav; 5161 PetscScalar *ca, *cam; 5162 PetscMPIInt size; 5163 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5164 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5165 PetscBool match; 5166 5167 PetscFunctionBegin; 5168 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5169 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5170 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5171 if (size == 1) { 5172 if (scall == MAT_INITIAL_MATRIX) { 5173 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5174 *A_loc = mpimat->A; 5175 } else if (scall == MAT_REUSE_MATRIX) { 5176 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5177 } 5178 PetscFunctionReturn(0); 5179 } 5180 5181 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5182 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5183 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5184 ai = a->i; 5185 aj = a->j; 5186 bi = b->i; 5187 bj = b->j; 5188 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5189 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5190 aa = aav; 5191 ba = bav; 5192 if (scall == MAT_INITIAL_MATRIX) { 5193 PetscCall(PetscMalloc1(1 + am, &ci)); 5194 ci[0] = 0; 5195 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5196 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5197 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5198 k = 0; 5199 for (i = 0; i < am; i++) { 5200 ncols_o = bi[i + 1] - bi[i]; 5201 ncols_d = ai[i + 1] - ai[i]; 5202 /* off-diagonal portion of A */ 5203 for (jo = 0; jo < ncols_o; jo++) { 5204 col = cmap[*bj]; 5205 if (col >= cstart) break; 5206 cj[k] = col; 5207 bj++; 5208 ca[k++] = *ba++; 5209 } 5210 /* diagonal portion of A */ 5211 for (j = 0; j < ncols_d; j++) { 5212 cj[k] = cstart + *aj++; 5213 ca[k++] = *aa++; 5214 } 5215 /* off-diagonal portion of A */ 5216 for (j = jo; j < ncols_o; j++) { 5217 cj[k] = cmap[*bj++]; 5218 ca[k++] = *ba++; 5219 } 5220 } 5221 /* put together the new matrix */ 5222 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5223 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5224 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5225 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5226 mat->free_a = PETSC_TRUE; 5227 mat->free_ij = PETSC_TRUE; 5228 mat->nonew = 0; 5229 } else if (scall == MAT_REUSE_MATRIX) { 5230 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5231 ci = mat->i; 5232 cj = mat->j; 5233 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5234 for (i = 0; i < am; i++) { 5235 /* off-diagonal portion of A */ 5236 ncols_o = bi[i + 1] - bi[i]; 5237 for (jo = 0; jo < ncols_o; jo++) { 5238 col = cmap[*bj]; 5239 if (col >= cstart) break; 5240 *cam++ = *ba++; 5241 bj++; 5242 } 5243 /* diagonal portion of A */ 5244 ncols_d = ai[i + 1] - ai[i]; 5245 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5246 /* off-diagonal portion of A */ 5247 for (j = jo; j < ncols_o; j++) { 5248 *cam++ = *ba++; 5249 bj++; 5250 } 5251 } 5252 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5253 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5254 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5255 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5256 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5257 PetscFunctionReturn(0); 5258 } 5259 5260 /*@ 5261 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5262 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5263 5264 Not Collective 5265 5266 Input Parameters: 5267 + A - the matrix 5268 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5269 5270 Output Parameters: 5271 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5272 - A_loc - the local sequential matrix generated 5273 5274 Level: developer 5275 5276 Note: 5277 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering) 5278 5279 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5280 @*/ 5281 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5282 { 5283 Mat Ao, Ad; 5284 const PetscInt *cmap; 5285 PetscMPIInt size; 5286 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5287 5288 PetscFunctionBegin; 5289 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5290 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5291 if (size == 1) { 5292 if (scall == MAT_INITIAL_MATRIX) { 5293 PetscCall(PetscObjectReference((PetscObject)Ad)); 5294 *A_loc = Ad; 5295 } else if (scall == MAT_REUSE_MATRIX) { 5296 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5297 } 5298 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5299 PetscFunctionReturn(0); 5300 } 5301 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5302 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5303 if (f) { 5304 PetscCall((*f)(A, scall, glob, A_loc)); 5305 } else { 5306 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5307 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5308 Mat_SeqAIJ *c; 5309 PetscInt *ai = a->i, *aj = a->j; 5310 PetscInt *bi = b->i, *bj = b->j; 5311 PetscInt *ci, *cj; 5312 const PetscScalar *aa, *ba; 5313 PetscScalar *ca; 5314 PetscInt i, j, am, dn, on; 5315 5316 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5317 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5318 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5319 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5320 if (scall == MAT_INITIAL_MATRIX) { 5321 PetscInt k; 5322 PetscCall(PetscMalloc1(1 + am, &ci)); 5323 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5324 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5325 ci[0] = 0; 5326 for (i = 0, k = 0; i < am; i++) { 5327 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5328 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5329 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5330 /* diagonal portion of A */ 5331 for (j = 0; j < ncols_d; j++, k++) { 5332 cj[k] = *aj++; 5333 ca[k] = *aa++; 5334 } 5335 /* off-diagonal portion of A */ 5336 for (j = 0; j < ncols_o; j++, k++) { 5337 cj[k] = dn + *bj++; 5338 ca[k] = *ba++; 5339 } 5340 } 5341 /* put together the new matrix */ 5342 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5343 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5344 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5345 c = (Mat_SeqAIJ *)(*A_loc)->data; 5346 c->free_a = PETSC_TRUE; 5347 c->free_ij = PETSC_TRUE; 5348 c->nonew = 0; 5349 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5350 } else if (scall == MAT_REUSE_MATRIX) { 5351 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5352 for (i = 0; i < am; i++) { 5353 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5354 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5355 /* diagonal portion of A */ 5356 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5357 /* off-diagonal portion of A */ 5358 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5359 } 5360 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5361 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5362 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5363 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5364 if (glob) { 5365 PetscInt cst, *gidx; 5366 5367 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5368 PetscCall(PetscMalloc1(dn + on, &gidx)); 5369 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5370 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5371 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5372 } 5373 } 5374 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5375 PetscFunctionReturn(0); 5376 } 5377 5378 /*@C 5379 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5380 5381 Not Collective 5382 5383 Input Parameters: 5384 + A - the matrix 5385 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5386 - row, col - index sets of rows and columns to extract (or NULL) 5387 5388 Output Parameter: 5389 . A_loc - the local sequential matrix generated 5390 5391 Level: developer 5392 5393 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5394 @*/ 5395 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5396 { 5397 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5398 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5399 IS isrowa, iscola; 5400 Mat *aloc; 5401 PetscBool match; 5402 5403 PetscFunctionBegin; 5404 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5405 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5406 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5407 if (!row) { 5408 start = A->rmap->rstart; 5409 end = A->rmap->rend; 5410 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5411 } else { 5412 isrowa = *row; 5413 } 5414 if (!col) { 5415 start = A->cmap->rstart; 5416 cmap = a->garray; 5417 nzA = a->A->cmap->n; 5418 nzB = a->B->cmap->n; 5419 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5420 ncols = 0; 5421 for (i = 0; i < nzB; i++) { 5422 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5423 else break; 5424 } 5425 imark = i; 5426 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5427 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5428 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5429 } else { 5430 iscola = *col; 5431 } 5432 if (scall != MAT_INITIAL_MATRIX) { 5433 PetscCall(PetscMalloc1(1, &aloc)); 5434 aloc[0] = *A_loc; 5435 } 5436 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5437 if (!col) { /* attach global id of condensed columns */ 5438 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5439 } 5440 *A_loc = aloc[0]; 5441 PetscCall(PetscFree(aloc)); 5442 if (!row) PetscCall(ISDestroy(&isrowa)); 5443 if (!col) PetscCall(ISDestroy(&iscola)); 5444 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5445 PetscFunctionReturn(0); 5446 } 5447 5448 /* 5449 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5450 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5451 * on a global size. 5452 * */ 5453 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5454 { 5455 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5456 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5457 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5458 PetscMPIInt owner; 5459 PetscSFNode *iremote, *oiremote; 5460 const PetscInt *lrowindices; 5461 PetscSF sf, osf; 5462 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5463 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5464 MPI_Comm comm; 5465 ISLocalToGlobalMapping mapping; 5466 const PetscScalar *pd_a, *po_a; 5467 5468 PetscFunctionBegin; 5469 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5470 /* plocalsize is the number of roots 5471 * nrows is the number of leaves 5472 * */ 5473 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5474 PetscCall(ISGetLocalSize(rows, &nrows)); 5475 PetscCall(PetscCalloc1(nrows, &iremote)); 5476 PetscCall(ISGetIndices(rows, &lrowindices)); 5477 for (i = 0; i < nrows; i++) { 5478 /* Find a remote index and an owner for a row 5479 * The row could be local or remote 5480 * */ 5481 owner = 0; 5482 lidx = 0; 5483 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5484 iremote[i].index = lidx; 5485 iremote[i].rank = owner; 5486 } 5487 /* Create SF to communicate how many nonzero columns for each row */ 5488 PetscCall(PetscSFCreate(comm, &sf)); 5489 /* SF will figure out the number of nonzero colunms for each row, and their 5490 * offsets 5491 * */ 5492 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5493 PetscCall(PetscSFSetFromOptions(sf)); 5494 PetscCall(PetscSFSetUp(sf)); 5495 5496 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5497 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5498 PetscCall(PetscCalloc1(nrows, &pnnz)); 5499 roffsets[0] = 0; 5500 roffsets[1] = 0; 5501 for (i = 0; i < plocalsize; i++) { 5502 /* diag */ 5503 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5504 /* off diag */ 5505 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5506 /* compute offsets so that we relative location for each row */ 5507 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5508 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5509 } 5510 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5511 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5512 /* 'r' means root, and 'l' means leaf */ 5513 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5514 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5515 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5516 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5517 PetscCall(PetscSFDestroy(&sf)); 5518 PetscCall(PetscFree(roffsets)); 5519 PetscCall(PetscFree(nrcols)); 5520 dntotalcols = 0; 5521 ontotalcols = 0; 5522 ncol = 0; 5523 for (i = 0; i < nrows; i++) { 5524 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5525 ncol = PetscMax(pnnz[i], ncol); 5526 /* diag */ 5527 dntotalcols += nlcols[i * 2 + 0]; 5528 /* off diag */ 5529 ontotalcols += nlcols[i * 2 + 1]; 5530 } 5531 /* We do not need to figure the right number of columns 5532 * since all the calculations will be done by going through the raw data 5533 * */ 5534 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5535 PetscCall(MatSetUp(*P_oth)); 5536 PetscCall(PetscFree(pnnz)); 5537 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5538 /* diag */ 5539 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5540 /* off diag */ 5541 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5542 /* diag */ 5543 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5544 /* off diag */ 5545 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5546 dntotalcols = 0; 5547 ontotalcols = 0; 5548 ntotalcols = 0; 5549 for (i = 0; i < nrows; i++) { 5550 owner = 0; 5551 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5552 /* Set iremote for diag matrix */ 5553 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5554 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5555 iremote[dntotalcols].rank = owner; 5556 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5557 ilocal[dntotalcols++] = ntotalcols++; 5558 } 5559 /* off diag */ 5560 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5561 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5562 oiremote[ontotalcols].rank = owner; 5563 oilocal[ontotalcols++] = ntotalcols++; 5564 } 5565 } 5566 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5567 PetscCall(PetscFree(loffsets)); 5568 PetscCall(PetscFree(nlcols)); 5569 PetscCall(PetscSFCreate(comm, &sf)); 5570 /* P serves as roots and P_oth is leaves 5571 * Diag matrix 5572 * */ 5573 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5574 PetscCall(PetscSFSetFromOptions(sf)); 5575 PetscCall(PetscSFSetUp(sf)); 5576 5577 PetscCall(PetscSFCreate(comm, &osf)); 5578 /* Off diag */ 5579 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5580 PetscCall(PetscSFSetFromOptions(osf)); 5581 PetscCall(PetscSFSetUp(osf)); 5582 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5583 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5584 /* We operate on the matrix internal data for saving memory */ 5585 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5586 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5587 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5588 /* Convert to global indices for diag matrix */ 5589 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5590 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5591 /* We want P_oth store global indices */ 5592 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5593 /* Use memory scalable approach */ 5594 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5595 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5596 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5597 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5598 /* Convert back to local indices */ 5599 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5600 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5601 nout = 0; 5602 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5603 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5604 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5605 /* Exchange values */ 5606 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5607 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5608 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5609 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5610 /* Stop PETSc from shrinking memory */ 5611 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5612 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5613 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5614 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5615 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5616 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5617 PetscCall(PetscSFDestroy(&sf)); 5618 PetscCall(PetscSFDestroy(&osf)); 5619 PetscFunctionReturn(0); 5620 } 5621 5622 /* 5623 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5624 * This supports MPIAIJ and MAIJ 5625 * */ 5626 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5627 { 5628 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5629 Mat_SeqAIJ *p_oth; 5630 IS rows, map; 5631 PetscHMapI hamp; 5632 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5633 MPI_Comm comm; 5634 PetscSF sf, osf; 5635 PetscBool has; 5636 5637 PetscFunctionBegin; 5638 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5639 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5640 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5641 * and then create a submatrix (that often is an overlapping matrix) 5642 * */ 5643 if (reuse == MAT_INITIAL_MATRIX) { 5644 /* Use a hash table to figure out unique keys */ 5645 PetscCall(PetscHMapICreate(&hamp)); 5646 PetscCall(PetscHMapIResize(hamp, a->B->cmap->n)); 5647 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5648 count = 0; 5649 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5650 for (i = 0; i < a->B->cmap->n; i++) { 5651 key = a->garray[i] / dof; 5652 PetscCall(PetscHMapIHas(hamp, key, &has)); 5653 if (!has) { 5654 mapping[i] = count; 5655 PetscCall(PetscHMapISet(hamp, key, count++)); 5656 } else { 5657 /* Current 'i' has the same value the previous step */ 5658 mapping[i] = count - 1; 5659 } 5660 } 5661 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5662 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5663 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ", htsize, count); 5664 PetscCall(PetscCalloc1(htsize, &rowindices)); 5665 off = 0; 5666 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5667 PetscCall(PetscHMapIDestroy(&hamp)); 5668 PetscCall(PetscSortInt(htsize, rowindices)); 5669 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5670 /* In case, the matrix was already created but users want to recreate the matrix */ 5671 PetscCall(MatDestroy(P_oth)); 5672 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5673 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5674 PetscCall(ISDestroy(&map)); 5675 PetscCall(ISDestroy(&rows)); 5676 } else if (reuse == MAT_REUSE_MATRIX) { 5677 /* If matrix was already created, we simply update values using SF objects 5678 * that as attached to the matrix ealier. 5679 */ 5680 const PetscScalar *pd_a, *po_a; 5681 5682 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5683 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5684 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5685 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5686 /* Update values in place */ 5687 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5688 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5689 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5690 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5691 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5692 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5693 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5694 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5695 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5696 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5697 PetscFunctionReturn(0); 5698 } 5699 5700 /*@C 5701 MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A 5702 5703 Collective on A 5704 5705 Input Parameters: 5706 + A - the first matrix in `MATMPIAIJ` format 5707 . B - the second matrix in `MATMPIAIJ` format 5708 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5709 5710 Output Parameters: 5711 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5712 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5713 - B_seq - the sequential matrix generated 5714 5715 Level: developer 5716 5717 @*/ 5718 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5719 { 5720 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5721 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5722 IS isrowb, iscolb; 5723 Mat *bseq = NULL; 5724 5725 PetscFunctionBegin; 5726 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5727 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5728 } 5729 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5730 5731 if (scall == MAT_INITIAL_MATRIX) { 5732 start = A->cmap->rstart; 5733 cmap = a->garray; 5734 nzA = a->A->cmap->n; 5735 nzB = a->B->cmap->n; 5736 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5737 ncols = 0; 5738 for (i = 0; i < nzB; i++) { /* row < local row index */ 5739 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5740 else break; 5741 } 5742 imark = i; 5743 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5744 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5745 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5746 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5747 } else { 5748 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5749 isrowb = *rowb; 5750 iscolb = *colb; 5751 PetscCall(PetscMalloc1(1, &bseq)); 5752 bseq[0] = *B_seq; 5753 } 5754 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5755 *B_seq = bseq[0]; 5756 PetscCall(PetscFree(bseq)); 5757 if (!rowb) { 5758 PetscCall(ISDestroy(&isrowb)); 5759 } else { 5760 *rowb = isrowb; 5761 } 5762 if (!colb) { 5763 PetscCall(ISDestroy(&iscolb)); 5764 } else { 5765 *colb = iscolb; 5766 } 5767 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5768 PetscFunctionReturn(0); 5769 } 5770 5771 /* 5772 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5773 of the OFF-DIAGONAL portion of local A 5774 5775 Collective on Mat 5776 5777 Input Parameters: 5778 + A,B - the matrices in mpiaij format 5779 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5780 5781 Output Parameter: 5782 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5783 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5784 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5785 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5786 5787 Developer Note: 5788 This directly accesses information inside the VecScatter associated with the matrix-vector product 5789 for this matrix. This is not desirable.. 5790 5791 Level: developer 5792 5793 */ 5794 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5795 { 5796 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5797 Mat_SeqAIJ *b_oth; 5798 VecScatter ctx; 5799 MPI_Comm comm; 5800 const PetscMPIInt *rprocs, *sprocs; 5801 const PetscInt *srow, *rstarts, *sstarts; 5802 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5803 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5804 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5805 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5806 PetscMPIInt size, tag, rank, nreqs; 5807 5808 PetscFunctionBegin; 5809 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5810 PetscCallMPI(MPI_Comm_size(comm, &size)); 5811 5812 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5813 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5814 } 5815 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5816 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5817 5818 if (size == 1) { 5819 startsj_s = NULL; 5820 bufa_ptr = NULL; 5821 *B_oth = NULL; 5822 PetscFunctionReturn(0); 5823 } 5824 5825 ctx = a->Mvctx; 5826 tag = ((PetscObject)ctx)->tag; 5827 5828 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5829 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5830 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5831 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5832 PetscCall(PetscMalloc1(nreqs, &reqs)); 5833 rwaits = reqs; 5834 swaits = reqs + nrecvs; 5835 5836 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5837 if (scall == MAT_INITIAL_MATRIX) { 5838 /* i-array */ 5839 /*---------*/ 5840 /* post receives */ 5841 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5842 for (i = 0; i < nrecvs; i++) { 5843 rowlen = rvalues + rstarts[i] * rbs; 5844 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5845 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5846 } 5847 5848 /* pack the outgoing message */ 5849 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5850 5851 sstartsj[0] = 0; 5852 rstartsj[0] = 0; 5853 len = 0; /* total length of j or a array to be sent */ 5854 if (nsends) { 5855 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5856 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5857 } 5858 for (i = 0; i < nsends; i++) { 5859 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5860 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5861 for (j = 0; j < nrows; j++) { 5862 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5863 for (l = 0; l < sbs; l++) { 5864 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5865 5866 rowlen[j * sbs + l] = ncols; 5867 5868 len += ncols; 5869 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5870 } 5871 k++; 5872 } 5873 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5874 5875 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5876 } 5877 /* recvs and sends of i-array are completed */ 5878 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5879 PetscCall(PetscFree(svalues)); 5880 5881 /* allocate buffers for sending j and a arrays */ 5882 PetscCall(PetscMalloc1(len + 1, &bufj)); 5883 PetscCall(PetscMalloc1(len + 1, &bufa)); 5884 5885 /* create i-array of B_oth */ 5886 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5887 5888 b_othi[0] = 0; 5889 len = 0; /* total length of j or a array to be received */ 5890 k = 0; 5891 for (i = 0; i < nrecvs; i++) { 5892 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5893 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5894 for (j = 0; j < nrows; j++) { 5895 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5896 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5897 k++; 5898 } 5899 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5900 } 5901 PetscCall(PetscFree(rvalues)); 5902 5903 /* allocate space for j and a arrays of B_oth */ 5904 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5905 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5906 5907 /* j-array */ 5908 /*---------*/ 5909 /* post receives of j-array */ 5910 for (i = 0; i < nrecvs; i++) { 5911 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5912 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5913 } 5914 5915 /* pack the outgoing message j-array */ 5916 if (nsends) k = sstarts[0]; 5917 for (i = 0; i < nsends; i++) { 5918 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5919 bufJ = bufj + sstartsj[i]; 5920 for (j = 0; j < nrows; j++) { 5921 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5922 for (ll = 0; ll < sbs; ll++) { 5923 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5924 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5925 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5926 } 5927 } 5928 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5929 } 5930 5931 /* recvs and sends of j-array are completed */ 5932 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5933 } else if (scall == MAT_REUSE_MATRIX) { 5934 sstartsj = *startsj_s; 5935 rstartsj = *startsj_r; 5936 bufa = *bufa_ptr; 5937 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5938 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5939 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5940 5941 /* a-array */ 5942 /*---------*/ 5943 /* post receives of a-array */ 5944 for (i = 0; i < nrecvs; i++) { 5945 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5946 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5947 } 5948 5949 /* pack the outgoing message a-array */ 5950 if (nsends) k = sstarts[0]; 5951 for (i = 0; i < nsends; i++) { 5952 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5953 bufA = bufa + sstartsj[i]; 5954 for (j = 0; j < nrows; j++) { 5955 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5956 for (ll = 0; ll < sbs; ll++) { 5957 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5958 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5959 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5960 } 5961 } 5962 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5963 } 5964 /* recvs and sends of a-array are completed */ 5965 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5966 PetscCall(PetscFree(reqs)); 5967 5968 if (scall == MAT_INITIAL_MATRIX) { 5969 /* put together the new matrix */ 5970 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5971 5972 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5973 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5974 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5975 b_oth->free_a = PETSC_TRUE; 5976 b_oth->free_ij = PETSC_TRUE; 5977 b_oth->nonew = 0; 5978 5979 PetscCall(PetscFree(bufj)); 5980 if (!startsj_s || !bufa_ptr) { 5981 PetscCall(PetscFree2(sstartsj, rstartsj)); 5982 PetscCall(PetscFree(bufa_ptr)); 5983 } else { 5984 *startsj_s = sstartsj; 5985 *startsj_r = rstartsj; 5986 *bufa_ptr = bufa; 5987 } 5988 } else if (scall == MAT_REUSE_MATRIX) { 5989 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5990 } 5991 5992 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5993 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5994 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5995 PetscFunctionReturn(0); 5996 } 5997 5998 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5999 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6000 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6001 #if defined(PETSC_HAVE_MKL_SPARSE) 6002 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6003 #endif 6004 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6005 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6006 #if defined(PETSC_HAVE_ELEMENTAL) 6007 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6008 #endif 6009 #if defined(PETSC_HAVE_SCALAPACK) 6010 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6011 #endif 6012 #if defined(PETSC_HAVE_HYPRE) 6013 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6014 #endif 6015 #if defined(PETSC_HAVE_CUDA) 6016 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6017 #endif 6018 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6019 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6020 #endif 6021 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6022 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6023 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6024 6025 /* 6026 Computes (B'*A')' since computing B*A directly is untenable 6027 6028 n p p 6029 [ ] [ ] [ ] 6030 m [ A ] * n [ B ] = m [ C ] 6031 [ ] [ ] [ ] 6032 6033 */ 6034 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6035 { 6036 Mat At, Bt, Ct; 6037 6038 PetscFunctionBegin; 6039 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6040 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6041 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6042 PetscCall(MatDestroy(&At)); 6043 PetscCall(MatDestroy(&Bt)); 6044 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6045 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6046 PetscCall(MatDestroy(&Ct)); 6047 PetscFunctionReturn(0); 6048 } 6049 6050 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6051 { 6052 PetscBool cisdense; 6053 6054 PetscFunctionBegin; 6055 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6056 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6057 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6058 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 6059 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6060 PetscCall(MatSetUp(C)); 6061 6062 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6063 PetscFunctionReturn(0); 6064 } 6065 6066 /* ----------------------------------------------------------------*/ 6067 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6068 { 6069 Mat_Product *product = C->product; 6070 Mat A = product->A, B = product->B; 6071 6072 PetscFunctionBegin; 6073 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6074 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6075 6076 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6077 C->ops->productsymbolic = MatProductSymbolic_AB; 6078 PetscFunctionReturn(0); 6079 } 6080 6081 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6082 { 6083 Mat_Product *product = C->product; 6084 6085 PetscFunctionBegin; 6086 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6087 PetscFunctionReturn(0); 6088 } 6089 6090 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6091 6092 Input Parameters: 6093 6094 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6095 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6096 6097 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6098 6099 For Set1, j1[] contains column indices of the nonzeros. 6100 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6101 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6102 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6103 6104 Similar for Set2. 6105 6106 This routine merges the two sets of nonzeros row by row and removes repeats. 6107 6108 Output Parameters: (memory is allocated by the caller) 6109 6110 i[],j[]: the CSR of the merged matrix, which has m rows. 6111 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6112 imap2[]: similar to imap1[], but for Set2. 6113 Note we order nonzeros row-by-row and from left to right. 6114 */ 6115 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6116 { 6117 PetscInt r, m; /* Row index of mat */ 6118 PetscCount t, t1, t2, b1, e1, b2, e2; 6119 6120 PetscFunctionBegin; 6121 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6122 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6123 i[0] = 0; 6124 for (r = 0; r < m; r++) { /* Do row by row merging */ 6125 b1 = rowBegin1[r]; 6126 e1 = rowEnd1[r]; 6127 b2 = rowBegin2[r]; 6128 e2 = rowEnd2[r]; 6129 while (b1 < e1 && b2 < e2) { 6130 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6131 j[t] = j1[b1]; 6132 imap1[t1] = t; 6133 imap2[t2] = t; 6134 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6135 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6136 t1++; 6137 t2++; 6138 t++; 6139 } else if (j1[b1] < j2[b2]) { 6140 j[t] = j1[b1]; 6141 imap1[t1] = t; 6142 b1 += jmap1[t1 + 1] - jmap1[t1]; 6143 t1++; 6144 t++; 6145 } else { 6146 j[t] = j2[b2]; 6147 imap2[t2] = t; 6148 b2 += jmap2[t2 + 1] - jmap2[t2]; 6149 t2++; 6150 t++; 6151 } 6152 } 6153 /* Merge the remaining in either j1[] or j2[] */ 6154 while (b1 < e1) { 6155 j[t] = j1[b1]; 6156 imap1[t1] = t; 6157 b1 += jmap1[t1 + 1] - jmap1[t1]; 6158 t1++; 6159 t++; 6160 } 6161 while (b2 < e2) { 6162 j[t] = j2[b2]; 6163 imap2[t2] = t; 6164 b2 += jmap2[t2 + 1] - jmap2[t2]; 6165 t2++; 6166 t++; 6167 } 6168 i[r + 1] = t; 6169 } 6170 PetscFunctionReturn(0); 6171 } 6172 6173 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6174 6175 Input Parameters: 6176 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6177 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6178 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6179 6180 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6181 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6182 6183 Output Parameters: 6184 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6185 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6186 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6187 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6188 6189 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6190 Atot: number of entries belonging to the diagonal block. 6191 Annz: number of unique nonzeros belonging to the diagonal block. 6192 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6193 repeats (i.e., same 'i,j' pair). 6194 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6195 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6196 6197 Atot: number of entries belonging to the diagonal block 6198 Annz: number of unique nonzeros belonging to the diagonal block. 6199 6200 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6201 6202 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6203 */ 6204 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6205 { 6206 PetscInt cstart, cend, rstart, rend, row, col; 6207 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6208 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6209 PetscCount k, m, p, q, r, s, mid; 6210 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6211 6212 PetscFunctionBegin; 6213 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6214 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6215 m = rend - rstart; 6216 6217 for (k = 0; k < n; k++) { 6218 if (i[k] >= 0) break; 6219 } /* Skip negative rows */ 6220 6221 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6222 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6223 */ 6224 while (k < n) { 6225 row = i[k]; 6226 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6227 for (s = k; s < n; s++) 6228 if (i[s] != row) break; 6229 for (p = k; p < s; p++) { 6230 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6231 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6232 } 6233 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6234 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6235 rowBegin[row - rstart] = k; 6236 rowMid[row - rstart] = mid; 6237 rowEnd[row - rstart] = s; 6238 6239 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6240 Atot += mid - k; 6241 Btot += s - mid; 6242 6243 /* Count unique nonzeros of this diag/offdiag row */ 6244 for (p = k; p < mid;) { 6245 col = j[p]; 6246 do { 6247 j[p] += PETSC_MAX_INT; 6248 p++; 6249 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6250 Annz++; 6251 } 6252 6253 for (p = mid; p < s;) { 6254 col = j[p]; 6255 do { 6256 p++; 6257 } while (p < s && j[p] == col); 6258 Bnnz++; 6259 } 6260 k = s; 6261 } 6262 6263 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6264 PetscCall(PetscMalloc1(Atot, &Aperm)); 6265 PetscCall(PetscMalloc1(Btot, &Bperm)); 6266 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6267 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6268 6269 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6270 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6271 for (r = 0; r < m; r++) { 6272 k = rowBegin[r]; 6273 mid = rowMid[r]; 6274 s = rowEnd[r]; 6275 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6276 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6277 Atot += mid - k; 6278 Btot += s - mid; 6279 6280 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6281 for (p = k; p < mid;) { 6282 col = j[p]; 6283 q = p; 6284 do { 6285 p++; 6286 } while (p < mid && j[p] == col); 6287 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6288 Annz++; 6289 } 6290 6291 for (p = mid; p < s;) { 6292 col = j[p]; 6293 q = p; 6294 do { 6295 p++; 6296 } while (p < s && j[p] == col); 6297 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6298 Bnnz++; 6299 } 6300 } 6301 /* Output */ 6302 *Aperm_ = Aperm; 6303 *Annz_ = Annz; 6304 *Atot_ = Atot; 6305 *Ajmap_ = Ajmap; 6306 *Bperm_ = Bperm; 6307 *Bnnz_ = Bnnz; 6308 *Btot_ = Btot; 6309 *Bjmap_ = Bjmap; 6310 PetscFunctionReturn(0); 6311 } 6312 6313 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6314 6315 Input Parameters: 6316 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6317 nnz: number of unique nonzeros in the merged matrix 6318 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6319 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6320 6321 Output Parameter: (memory is allocated by the caller) 6322 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6323 6324 Example: 6325 nnz1 = 4 6326 nnz = 6 6327 imap = [1,3,4,5] 6328 jmap = [0,3,5,6,7] 6329 then, 6330 jmap_new = [0,0,3,3,5,6,7] 6331 */ 6332 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6333 { 6334 PetscCount k, p; 6335 6336 PetscFunctionBegin; 6337 jmap_new[0] = 0; 6338 p = nnz; /* p loops over jmap_new[] backwards */ 6339 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6340 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6341 } 6342 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6343 PetscFunctionReturn(0); 6344 } 6345 6346 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6347 { 6348 MPI_Comm comm; 6349 PetscMPIInt rank, size; 6350 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6351 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6352 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6353 6354 PetscFunctionBegin; 6355 PetscCall(PetscFree(mpiaij->garray)); 6356 PetscCall(VecDestroy(&mpiaij->lvec)); 6357 #if defined(PETSC_USE_CTABLE) 6358 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6359 #else 6360 PetscCall(PetscFree(mpiaij->colmap)); 6361 #endif 6362 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6363 mat->assembled = PETSC_FALSE; 6364 mat->was_assembled = PETSC_FALSE; 6365 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6366 6367 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6368 PetscCallMPI(MPI_Comm_size(comm, &size)); 6369 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6370 PetscCall(PetscLayoutSetUp(mat->rmap)); 6371 PetscCall(PetscLayoutSetUp(mat->cmap)); 6372 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6373 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6374 PetscCall(MatGetLocalSize(mat, &m, &n)); 6375 PetscCall(MatGetSize(mat, &M, &N)); 6376 6377 /* ---------------------------------------------------------------------------*/ 6378 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6379 /* entries come first, then local rows, then remote rows. */ 6380 /* ---------------------------------------------------------------------------*/ 6381 PetscCount n1 = coo_n, *perm1; 6382 PetscInt *i1 = coo_i, *j1 = coo_j; 6383 6384 PetscCall(PetscMalloc1(n1, &perm1)); 6385 for (k = 0; k < n1; k++) perm1[k] = k; 6386 6387 /* Manipulate indices so that entries with negative row or col indices will have smallest 6388 row indices, local entries will have greater but negative row indices, and remote entries 6389 will have positive row indices. 6390 */ 6391 for (k = 0; k < n1; k++) { 6392 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6393 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6394 else { 6395 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6396 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6397 } 6398 } 6399 6400 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6401 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6402 for (k = 0; k < n1; k++) { 6403 if (i1[k] > PETSC_MIN_INT) break; 6404 } /* Advance k to the first entry we need to take care of */ 6405 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6406 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6407 6408 /* ---------------------------------------------------------------------------*/ 6409 /* Split local rows into diag/offdiag portions */ 6410 /* ---------------------------------------------------------------------------*/ 6411 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6412 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6413 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6414 6415 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6416 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6417 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6418 6419 /* ---------------------------------------------------------------------------*/ 6420 /* Send remote rows to their owner */ 6421 /* ---------------------------------------------------------------------------*/ 6422 /* Find which rows should be sent to which remote ranks*/ 6423 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6424 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6425 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6426 const PetscInt *ranges; 6427 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6428 6429 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6430 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6431 for (k = rem; k < n1;) { 6432 PetscMPIInt owner; 6433 PetscInt firstRow, lastRow; 6434 6435 /* Locate a row range */ 6436 firstRow = i1[k]; /* first row of this owner */ 6437 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6438 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6439 6440 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6441 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6442 6443 /* All entries in [k,p) belong to this remote owner */ 6444 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6445 PetscMPIInt *sendto2; 6446 PetscInt *nentries2; 6447 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6448 6449 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6450 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6451 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6452 PetscCall(PetscFree2(sendto, nentries2)); 6453 sendto = sendto2; 6454 nentries = nentries2; 6455 maxNsend = maxNsend2; 6456 } 6457 sendto[nsend] = owner; 6458 nentries[nsend] = p - k; 6459 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6460 nsend++; 6461 k = p; 6462 } 6463 6464 /* Build 1st SF to know offsets on remote to send data */ 6465 PetscSF sf1; 6466 PetscInt nroots = 1, nroots2 = 0; 6467 PetscInt nleaves = nsend, nleaves2 = 0; 6468 PetscInt *offsets; 6469 PetscSFNode *iremote; 6470 6471 PetscCall(PetscSFCreate(comm, &sf1)); 6472 PetscCall(PetscMalloc1(nsend, &iremote)); 6473 PetscCall(PetscMalloc1(nsend, &offsets)); 6474 for (k = 0; k < nsend; k++) { 6475 iremote[k].rank = sendto[k]; 6476 iremote[k].index = 0; 6477 nleaves2 += nentries[k]; 6478 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6479 } 6480 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6481 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6482 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6483 PetscCall(PetscSFDestroy(&sf1)); 6484 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6485 6486 /* Build 2nd SF to send remote COOs to their owner */ 6487 PetscSF sf2; 6488 nroots = nroots2; 6489 nleaves = nleaves2; 6490 PetscCall(PetscSFCreate(comm, &sf2)); 6491 PetscCall(PetscSFSetFromOptions(sf2)); 6492 PetscCall(PetscMalloc1(nleaves, &iremote)); 6493 p = 0; 6494 for (k = 0; k < nsend; k++) { 6495 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6496 for (q = 0; q < nentries[k]; q++, p++) { 6497 iremote[p].rank = sendto[k]; 6498 iremote[p].index = offsets[k] + q; 6499 } 6500 } 6501 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6502 6503 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6504 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6505 6506 /* Send the remote COOs to their owner */ 6507 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6508 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6509 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6510 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6511 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6512 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6513 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6514 6515 PetscCall(PetscFree(offsets)); 6516 PetscCall(PetscFree2(sendto, nentries)); 6517 6518 /* ---------------------------------------------------------------*/ 6519 /* Sort received COOs by row along with the permutation array */ 6520 /* ---------------------------------------------------------------*/ 6521 for (k = 0; k < n2; k++) perm2[k] = k; 6522 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6523 6524 /* ---------------------------------------------------------------*/ 6525 /* Split received COOs into diag/offdiag portions */ 6526 /* ---------------------------------------------------------------*/ 6527 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6528 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6529 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6530 6531 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6532 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6533 6534 /* --------------------------------------------------------------------------*/ 6535 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6536 /* --------------------------------------------------------------------------*/ 6537 PetscInt *Ai, *Bi; 6538 PetscInt *Aj, *Bj; 6539 6540 PetscCall(PetscMalloc1(m + 1, &Ai)); 6541 PetscCall(PetscMalloc1(m + 1, &Bi)); 6542 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6543 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6544 6545 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6546 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6547 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6548 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6549 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6550 6551 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6552 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6553 6554 /* --------------------------------------------------------------------------*/ 6555 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6556 /* expect nonzeros in A/B most likely have local contributing entries */ 6557 /* --------------------------------------------------------------------------*/ 6558 PetscInt Annz = Ai[m]; 6559 PetscInt Bnnz = Bi[m]; 6560 PetscCount *Ajmap1_new, *Bjmap1_new; 6561 6562 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6563 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6564 6565 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6566 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6567 6568 PetscCall(PetscFree(Aimap1)); 6569 PetscCall(PetscFree(Ajmap1)); 6570 PetscCall(PetscFree(Bimap1)); 6571 PetscCall(PetscFree(Bjmap1)); 6572 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6573 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6574 PetscCall(PetscFree(perm1)); 6575 PetscCall(PetscFree3(i2, j2, perm2)); 6576 6577 Ajmap1 = Ajmap1_new; 6578 Bjmap1 = Bjmap1_new; 6579 6580 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6581 if (Annz < Annz1 + Annz2) { 6582 PetscInt *Aj_new; 6583 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6584 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6585 PetscCall(PetscFree(Aj)); 6586 Aj = Aj_new; 6587 } 6588 6589 if (Bnnz < Bnnz1 + Bnnz2) { 6590 PetscInt *Bj_new; 6591 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6592 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6593 PetscCall(PetscFree(Bj)); 6594 Bj = Bj_new; 6595 } 6596 6597 /* --------------------------------------------------------------------------------*/ 6598 /* Create new submatrices for on-process and off-process coupling */ 6599 /* --------------------------------------------------------------------------------*/ 6600 PetscScalar *Aa, *Ba; 6601 MatType rtype; 6602 Mat_SeqAIJ *a, *b; 6603 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6604 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6605 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6606 if (cstart) { 6607 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6608 } 6609 PetscCall(MatDestroy(&mpiaij->A)); 6610 PetscCall(MatDestroy(&mpiaij->B)); 6611 PetscCall(MatGetRootType_Private(mat, &rtype)); 6612 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6613 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6614 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6615 6616 a = (Mat_SeqAIJ *)mpiaij->A->data; 6617 b = (Mat_SeqAIJ *)mpiaij->B->data; 6618 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6619 a->free_a = b->free_a = PETSC_TRUE; 6620 a->free_ij = b->free_ij = PETSC_TRUE; 6621 6622 /* conversion must happen AFTER multiply setup */ 6623 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6624 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6625 PetscCall(VecDestroy(&mpiaij->lvec)); 6626 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6627 6628 mpiaij->coo_n = coo_n; 6629 mpiaij->coo_sf = sf2; 6630 mpiaij->sendlen = nleaves; 6631 mpiaij->recvlen = nroots; 6632 6633 mpiaij->Annz = Annz; 6634 mpiaij->Bnnz = Bnnz; 6635 6636 mpiaij->Annz2 = Annz2; 6637 mpiaij->Bnnz2 = Bnnz2; 6638 6639 mpiaij->Atot1 = Atot1; 6640 mpiaij->Atot2 = Atot2; 6641 mpiaij->Btot1 = Btot1; 6642 mpiaij->Btot2 = Btot2; 6643 6644 mpiaij->Ajmap1 = Ajmap1; 6645 mpiaij->Aperm1 = Aperm1; 6646 6647 mpiaij->Bjmap1 = Bjmap1; 6648 mpiaij->Bperm1 = Bperm1; 6649 6650 mpiaij->Aimap2 = Aimap2; 6651 mpiaij->Ajmap2 = Ajmap2; 6652 mpiaij->Aperm2 = Aperm2; 6653 6654 mpiaij->Bimap2 = Bimap2; 6655 mpiaij->Bjmap2 = Bjmap2; 6656 mpiaij->Bperm2 = Bperm2; 6657 6658 mpiaij->Cperm1 = Cperm1; 6659 6660 /* Allocate in preallocation. If not used, it has zero cost on host */ 6661 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6662 PetscFunctionReturn(0); 6663 } 6664 6665 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6666 { 6667 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6668 Mat A = mpiaij->A, B = mpiaij->B; 6669 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6670 PetscScalar *Aa, *Ba; 6671 PetscScalar *sendbuf = mpiaij->sendbuf; 6672 PetscScalar *recvbuf = mpiaij->recvbuf; 6673 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6674 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6675 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6676 const PetscCount *Cperm1 = mpiaij->Cperm1; 6677 6678 PetscFunctionBegin; 6679 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6680 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6681 6682 /* Pack entries to be sent to remote */ 6683 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6684 6685 /* Send remote entries to their owner and overlap the communication with local computation */ 6686 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6687 /* Add local entries to A and B */ 6688 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6689 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6690 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6691 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6692 } 6693 for (PetscCount i = 0; i < Bnnz; i++) { 6694 PetscScalar sum = 0.0; 6695 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6696 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6697 } 6698 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6699 6700 /* Add received remote entries to A and B */ 6701 for (PetscCount i = 0; i < Annz2; i++) { 6702 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6703 } 6704 for (PetscCount i = 0; i < Bnnz2; i++) { 6705 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6706 } 6707 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6708 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6709 PetscFunctionReturn(0); 6710 } 6711 6712 /* ----------------------------------------------------------------*/ 6713 6714 /*MC 6715 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6716 6717 Options Database Keys: 6718 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6719 6720 Level: beginner 6721 6722 Notes: 6723 `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values, 6724 in this case the values associated with the rows and columns one passes in are set to zero 6725 in the matrix 6726 6727 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6728 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6729 6730 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6731 M*/ 6732 6733 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6734 { 6735 Mat_MPIAIJ *b; 6736 PetscMPIInt size; 6737 6738 PetscFunctionBegin; 6739 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6740 6741 PetscCall(PetscNew(&b)); 6742 B->data = (void *)b; 6743 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6744 B->assembled = PETSC_FALSE; 6745 B->insertmode = NOT_SET_VALUES; 6746 b->size = size; 6747 6748 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6749 6750 /* build cache for off array entries formed */ 6751 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6752 6753 b->donotstash = PETSC_FALSE; 6754 b->colmap = NULL; 6755 b->garray = NULL; 6756 b->roworiented = PETSC_TRUE; 6757 6758 /* stuff used for matrix vector multiply */ 6759 b->lvec = NULL; 6760 b->Mvctx = NULL; 6761 6762 /* stuff for MatGetRow() */ 6763 b->rowindices = NULL; 6764 b->rowvalues = NULL; 6765 b->getrowactive = PETSC_FALSE; 6766 6767 /* flexible pointer used in CUSPARSE classes */ 6768 b->spptr = NULL; 6769 6770 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6771 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6772 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6773 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6774 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6775 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6776 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6777 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6778 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6779 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6780 #if defined(PETSC_HAVE_CUDA) 6781 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6782 #endif 6783 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6784 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6785 #endif 6786 #if defined(PETSC_HAVE_MKL_SPARSE) 6787 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6788 #endif 6789 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6790 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6791 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6792 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6793 #if defined(PETSC_HAVE_ELEMENTAL) 6794 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6795 #endif 6796 #if defined(PETSC_HAVE_SCALAPACK) 6797 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6798 #endif 6799 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6800 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6801 #if defined(PETSC_HAVE_HYPRE) 6802 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6803 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6804 #endif 6805 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6806 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6807 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6808 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6809 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6810 PetscFunctionReturn(0); 6811 } 6812 6813 /*@C 6814 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6815 and "off-diagonal" part of the matrix in CSR format. 6816 6817 Collective 6818 6819 Input Parameters: 6820 + comm - MPI communicator 6821 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6822 . n - This value should be the same as the local size used in creating the 6823 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6824 calculated if N is given) For square matrices n is almost always m. 6825 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 6826 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 6827 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6828 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6829 . a - matrix values 6830 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6831 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6832 - oa - matrix values 6833 6834 Output Parameter: 6835 . mat - the matrix 6836 6837 Level: advanced 6838 6839 Notes: 6840 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6841 must free the arrays once the matrix has been destroyed and not before. 6842 6843 The i and j indices are 0 based 6844 6845 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6846 6847 This sets local rows and cannot be used to set off-processor values. 6848 6849 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6850 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6851 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6852 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6853 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6854 communication if it is known that only local entries will be set. 6855 6856 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6857 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6858 @*/ 6859 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6860 { 6861 Mat_MPIAIJ *maij; 6862 6863 PetscFunctionBegin; 6864 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6865 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6866 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6867 PetscCall(MatCreate(comm, mat)); 6868 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6869 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6870 maij = (Mat_MPIAIJ *)(*mat)->data; 6871 6872 (*mat)->preallocated = PETSC_TRUE; 6873 6874 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6875 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6876 6877 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6878 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6879 6880 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6881 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6882 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6883 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6884 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6885 PetscFunctionReturn(0); 6886 } 6887 6888 typedef struct { 6889 Mat *mp; /* intermediate products */ 6890 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6891 PetscInt cp; /* number of intermediate products */ 6892 6893 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6894 PetscInt *startsj_s, *startsj_r; 6895 PetscScalar *bufa; 6896 Mat P_oth; 6897 6898 /* may take advantage of merging product->B */ 6899 Mat Bloc; /* B-local by merging diag and off-diag */ 6900 6901 /* cusparse does not have support to split between symbolic and numeric phases. 6902 When api_user is true, we don't need to update the numerical values 6903 of the temporary storage */ 6904 PetscBool reusesym; 6905 6906 /* support for COO values insertion */ 6907 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6908 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6909 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6910 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6911 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6912 PetscMemType mtype; 6913 6914 /* customization */ 6915 PetscBool abmerge; 6916 PetscBool P_oth_bind; 6917 } MatMatMPIAIJBACKEND; 6918 6919 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6920 { 6921 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6922 PetscInt i; 6923 6924 PetscFunctionBegin; 6925 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6926 PetscCall(PetscFree(mmdata->bufa)); 6927 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6928 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6929 PetscCall(MatDestroy(&mmdata->P_oth)); 6930 PetscCall(MatDestroy(&mmdata->Bloc)); 6931 PetscCall(PetscSFDestroy(&mmdata->sf)); 6932 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6933 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6934 PetscCall(PetscFree(mmdata->own[0])); 6935 PetscCall(PetscFree(mmdata->own)); 6936 PetscCall(PetscFree(mmdata->off[0])); 6937 PetscCall(PetscFree(mmdata->off)); 6938 PetscCall(PetscFree(mmdata)); 6939 PetscFunctionReturn(0); 6940 } 6941 6942 /* Copy selected n entries with indices in idx[] of A to v[]. 6943 If idx is NULL, copy the whole data array of A to v[] 6944 */ 6945 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6946 { 6947 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6948 6949 PetscFunctionBegin; 6950 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 6951 if (f) { 6952 PetscCall((*f)(A, n, idx, v)); 6953 } else { 6954 const PetscScalar *vv; 6955 6956 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 6957 if (n && idx) { 6958 PetscScalar *w = v; 6959 const PetscInt *oi = idx; 6960 PetscInt j; 6961 6962 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6963 } else { 6964 PetscCall(PetscArraycpy(v, vv, n)); 6965 } 6966 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 6967 } 6968 PetscFunctionReturn(0); 6969 } 6970 6971 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6972 { 6973 MatMatMPIAIJBACKEND *mmdata; 6974 PetscInt i, n_d, n_o; 6975 6976 PetscFunctionBegin; 6977 MatCheckProduct(C, 1); 6978 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 6979 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 6980 if (!mmdata->reusesym) { /* update temporary matrices */ 6981 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 6982 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 6983 } 6984 mmdata->reusesym = PETSC_FALSE; 6985 6986 for (i = 0; i < mmdata->cp; i++) { 6987 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 6988 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6989 } 6990 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6991 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 6992 6993 if (mmdata->mptmp[i]) continue; 6994 if (noff) { 6995 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 6996 6997 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 6998 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 6999 n_o += noff; 7000 n_d += nown; 7001 } else { 7002 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7003 7004 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7005 n_d += mm->nz; 7006 } 7007 } 7008 if (mmdata->hasoffproc) { /* offprocess insertion */ 7009 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7010 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7011 } 7012 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7013 PetscFunctionReturn(0); 7014 } 7015 7016 /* Support for Pt * A, A * P, or Pt * A * P */ 7017 #define MAX_NUMBER_INTERMEDIATE 4 7018 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7019 { 7020 Mat_Product *product = C->product; 7021 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7022 Mat_MPIAIJ *a, *p; 7023 MatMatMPIAIJBACKEND *mmdata; 7024 ISLocalToGlobalMapping P_oth_l2g = NULL; 7025 IS glob = NULL; 7026 const char *prefix; 7027 char pprefix[256]; 7028 const PetscInt *globidx, *P_oth_idx; 7029 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7030 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7031 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7032 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7033 /* a base offset; type-2: sparse with a local to global map table */ 7034 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7035 7036 MatProductType ptype; 7037 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk; 7038 PetscMPIInt size; 7039 7040 PetscFunctionBegin; 7041 MatCheckProduct(C, 1); 7042 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7043 ptype = product->type; 7044 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7045 ptype = MATPRODUCT_AB; 7046 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7047 } 7048 switch (ptype) { 7049 case MATPRODUCT_AB: 7050 A = product->A; 7051 P = product->B; 7052 m = A->rmap->n; 7053 n = P->cmap->n; 7054 M = A->rmap->N; 7055 N = P->cmap->N; 7056 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7057 break; 7058 case MATPRODUCT_AtB: 7059 P = product->A; 7060 A = product->B; 7061 m = P->cmap->n; 7062 n = A->cmap->n; 7063 M = P->cmap->N; 7064 N = A->cmap->N; 7065 hasoffproc = PETSC_TRUE; 7066 break; 7067 case MATPRODUCT_PtAP: 7068 A = product->A; 7069 P = product->B; 7070 m = P->cmap->n; 7071 n = P->cmap->n; 7072 M = P->cmap->N; 7073 N = P->cmap->N; 7074 hasoffproc = PETSC_TRUE; 7075 break; 7076 default: 7077 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7078 } 7079 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7080 if (size == 1) hasoffproc = PETSC_FALSE; 7081 7082 /* defaults */ 7083 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7084 mp[i] = NULL; 7085 mptmp[i] = PETSC_FALSE; 7086 rmapt[i] = -1; 7087 cmapt[i] = -1; 7088 rmapa[i] = NULL; 7089 cmapa[i] = NULL; 7090 } 7091 7092 /* customization */ 7093 PetscCall(PetscNew(&mmdata)); 7094 mmdata->reusesym = product->api_user; 7095 if (ptype == MATPRODUCT_AB) { 7096 if (product->api_user) { 7097 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7098 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7099 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7100 PetscOptionsEnd(); 7101 } else { 7102 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7103 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7104 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7105 PetscOptionsEnd(); 7106 } 7107 } else if (ptype == MATPRODUCT_PtAP) { 7108 if (product->api_user) { 7109 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7110 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7111 PetscOptionsEnd(); 7112 } else { 7113 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7114 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7115 PetscOptionsEnd(); 7116 } 7117 } 7118 a = (Mat_MPIAIJ *)A->data; 7119 p = (Mat_MPIAIJ *)P->data; 7120 PetscCall(MatSetSizes(C, m, n, M, N)); 7121 PetscCall(PetscLayoutSetUp(C->rmap)); 7122 PetscCall(PetscLayoutSetUp(C->cmap)); 7123 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7124 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7125 7126 cp = 0; 7127 switch (ptype) { 7128 case MATPRODUCT_AB: /* A * P */ 7129 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7130 7131 /* A_diag * P_local (merged or not) */ 7132 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7133 /* P is product->B */ 7134 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7135 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7136 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7137 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7138 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7139 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7140 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7141 mp[cp]->product->api_user = product->api_user; 7142 PetscCall(MatProductSetFromOptions(mp[cp])); 7143 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7144 PetscCall(ISGetIndices(glob, &globidx)); 7145 rmapt[cp] = 1; 7146 cmapt[cp] = 2; 7147 cmapa[cp] = globidx; 7148 mptmp[cp] = PETSC_FALSE; 7149 cp++; 7150 } else { /* A_diag * P_diag and A_diag * P_off */ 7151 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7152 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7153 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7154 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7155 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7156 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7157 mp[cp]->product->api_user = product->api_user; 7158 PetscCall(MatProductSetFromOptions(mp[cp])); 7159 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7160 rmapt[cp] = 1; 7161 cmapt[cp] = 1; 7162 mptmp[cp] = PETSC_FALSE; 7163 cp++; 7164 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7165 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7166 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7167 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7168 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7169 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7170 mp[cp]->product->api_user = product->api_user; 7171 PetscCall(MatProductSetFromOptions(mp[cp])); 7172 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7173 rmapt[cp] = 1; 7174 cmapt[cp] = 2; 7175 cmapa[cp] = p->garray; 7176 mptmp[cp] = PETSC_FALSE; 7177 cp++; 7178 } 7179 7180 /* A_off * P_other */ 7181 if (mmdata->P_oth) { 7182 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7183 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7184 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7185 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7186 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7187 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7188 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7189 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7190 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7191 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7192 mp[cp]->product->api_user = product->api_user; 7193 PetscCall(MatProductSetFromOptions(mp[cp])); 7194 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7195 rmapt[cp] = 1; 7196 cmapt[cp] = 2; 7197 cmapa[cp] = P_oth_idx; 7198 mptmp[cp] = PETSC_FALSE; 7199 cp++; 7200 } 7201 break; 7202 7203 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7204 /* A is product->B */ 7205 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7206 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7207 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7208 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7209 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7210 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7211 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7212 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7213 mp[cp]->product->api_user = product->api_user; 7214 PetscCall(MatProductSetFromOptions(mp[cp])); 7215 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7216 PetscCall(ISGetIndices(glob, &globidx)); 7217 rmapt[cp] = 2; 7218 rmapa[cp] = globidx; 7219 cmapt[cp] = 2; 7220 cmapa[cp] = globidx; 7221 mptmp[cp] = PETSC_FALSE; 7222 cp++; 7223 } else { 7224 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7225 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7226 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7227 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7228 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7229 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7230 mp[cp]->product->api_user = product->api_user; 7231 PetscCall(MatProductSetFromOptions(mp[cp])); 7232 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7233 PetscCall(ISGetIndices(glob, &globidx)); 7234 rmapt[cp] = 1; 7235 cmapt[cp] = 2; 7236 cmapa[cp] = globidx; 7237 mptmp[cp] = PETSC_FALSE; 7238 cp++; 7239 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7240 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7241 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7242 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7243 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7244 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7245 mp[cp]->product->api_user = product->api_user; 7246 PetscCall(MatProductSetFromOptions(mp[cp])); 7247 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7248 rmapt[cp] = 2; 7249 rmapa[cp] = p->garray; 7250 cmapt[cp] = 2; 7251 cmapa[cp] = globidx; 7252 mptmp[cp] = PETSC_FALSE; 7253 cp++; 7254 } 7255 break; 7256 case MATPRODUCT_PtAP: 7257 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7258 /* P is product->B */ 7259 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7260 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7261 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7262 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7263 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7264 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7265 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7266 mp[cp]->product->api_user = product->api_user; 7267 PetscCall(MatProductSetFromOptions(mp[cp])); 7268 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7269 PetscCall(ISGetIndices(glob, &globidx)); 7270 rmapt[cp] = 2; 7271 rmapa[cp] = globidx; 7272 cmapt[cp] = 2; 7273 cmapa[cp] = globidx; 7274 mptmp[cp] = PETSC_FALSE; 7275 cp++; 7276 if (mmdata->P_oth) { 7277 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7278 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7279 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7280 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7281 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7282 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7283 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7284 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7285 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7286 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7287 mp[cp]->product->api_user = product->api_user; 7288 PetscCall(MatProductSetFromOptions(mp[cp])); 7289 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7290 mptmp[cp] = PETSC_TRUE; 7291 cp++; 7292 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7293 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7294 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7295 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7296 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7297 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7298 mp[cp]->product->api_user = product->api_user; 7299 PetscCall(MatProductSetFromOptions(mp[cp])); 7300 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7301 rmapt[cp] = 2; 7302 rmapa[cp] = globidx; 7303 cmapt[cp] = 2; 7304 cmapa[cp] = P_oth_idx; 7305 mptmp[cp] = PETSC_FALSE; 7306 cp++; 7307 } 7308 break; 7309 default: 7310 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7311 } 7312 /* sanity check */ 7313 if (size > 1) 7314 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7315 7316 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7317 for (i = 0; i < cp; i++) { 7318 mmdata->mp[i] = mp[i]; 7319 mmdata->mptmp[i] = mptmp[i]; 7320 } 7321 mmdata->cp = cp; 7322 C->product->data = mmdata; 7323 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7324 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7325 7326 /* memory type */ 7327 mmdata->mtype = PETSC_MEMTYPE_HOST; 7328 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7329 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7330 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7331 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7332 7333 /* prepare coo coordinates for values insertion */ 7334 7335 /* count total nonzeros of those intermediate seqaij Mats 7336 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7337 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7338 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7339 */ 7340 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7341 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7342 if (mptmp[cp]) continue; 7343 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7344 const PetscInt *rmap = rmapa[cp]; 7345 const PetscInt mr = mp[cp]->rmap->n; 7346 const PetscInt rs = C->rmap->rstart; 7347 const PetscInt re = C->rmap->rend; 7348 const PetscInt *ii = mm->i; 7349 for (i = 0; i < mr; i++) { 7350 const PetscInt gr = rmap[i]; 7351 const PetscInt nz = ii[i + 1] - ii[i]; 7352 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7353 else ncoo_oown += nz; /* this row is local */ 7354 } 7355 } else ncoo_d += mm->nz; 7356 } 7357 7358 /* 7359 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7360 7361 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7362 7363 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7364 7365 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7366 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7367 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7368 7369 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7370 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7371 */ 7372 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7373 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7374 7375 /* gather (i,j) of nonzeros inserted by remote procs */ 7376 if (hasoffproc) { 7377 PetscSF msf; 7378 PetscInt ncoo2, *coo_i2, *coo_j2; 7379 7380 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7381 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7382 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7383 7384 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7385 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7386 PetscInt *idxoff = mmdata->off[cp]; 7387 PetscInt *idxown = mmdata->own[cp]; 7388 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7389 const PetscInt *rmap = rmapa[cp]; 7390 const PetscInt *cmap = cmapa[cp]; 7391 const PetscInt *ii = mm->i; 7392 PetscInt *coi = coo_i + ncoo_o; 7393 PetscInt *coj = coo_j + ncoo_o; 7394 const PetscInt mr = mp[cp]->rmap->n; 7395 const PetscInt rs = C->rmap->rstart; 7396 const PetscInt re = C->rmap->rend; 7397 const PetscInt cs = C->cmap->rstart; 7398 for (i = 0; i < mr; i++) { 7399 const PetscInt *jj = mm->j + ii[i]; 7400 const PetscInt gr = rmap[i]; 7401 const PetscInt nz = ii[i + 1] - ii[i]; 7402 if (gr < rs || gr >= re) { /* this is an offproc row */ 7403 for (j = ii[i]; j < ii[i + 1]; j++) { 7404 *coi++ = gr; 7405 *idxoff++ = j; 7406 } 7407 if (!cmapt[cp]) { /* already global */ 7408 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7409 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7410 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7411 } else { /* offdiag */ 7412 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7413 } 7414 ncoo_o += nz; 7415 } else { /* this is a local row */ 7416 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7417 } 7418 } 7419 } 7420 mmdata->off[cp + 1] = idxoff; 7421 mmdata->own[cp + 1] = idxown; 7422 } 7423 7424 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7425 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7426 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7427 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7428 ncoo = ncoo_d + ncoo_oown + ncoo2; 7429 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7430 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7431 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7432 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7433 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7434 PetscCall(PetscFree2(coo_i, coo_j)); 7435 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7436 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7437 coo_i = coo_i2; 7438 coo_j = coo_j2; 7439 } else { /* no offproc values insertion */ 7440 ncoo = ncoo_d; 7441 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7442 7443 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7444 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7445 PetscCall(PetscSFSetUp(mmdata->sf)); 7446 } 7447 mmdata->hasoffproc = hasoffproc; 7448 7449 /* gather (i,j) of nonzeros inserted locally */ 7450 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7451 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7452 PetscInt *coi = coo_i + ncoo_d; 7453 PetscInt *coj = coo_j + ncoo_d; 7454 const PetscInt *jj = mm->j; 7455 const PetscInt *ii = mm->i; 7456 const PetscInt *cmap = cmapa[cp]; 7457 const PetscInt *rmap = rmapa[cp]; 7458 const PetscInt mr = mp[cp]->rmap->n; 7459 const PetscInt rs = C->rmap->rstart; 7460 const PetscInt re = C->rmap->rend; 7461 const PetscInt cs = C->cmap->rstart; 7462 7463 if (mptmp[cp]) continue; 7464 if (rmapt[cp] == 1) { /* consecutive rows */ 7465 /* fill coo_i */ 7466 for (i = 0; i < mr; i++) { 7467 const PetscInt gr = i + rs; 7468 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7469 } 7470 /* fill coo_j */ 7471 if (!cmapt[cp]) { /* type-0, already global */ 7472 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7473 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7474 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7475 } else { /* type-2, local to global for sparse columns */ 7476 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7477 } 7478 ncoo_d += mm->nz; 7479 } else if (rmapt[cp] == 2) { /* sparse rows */ 7480 for (i = 0; i < mr; i++) { 7481 const PetscInt *jj = mm->j + ii[i]; 7482 const PetscInt gr = rmap[i]; 7483 const PetscInt nz = ii[i + 1] - ii[i]; 7484 if (gr >= rs && gr < re) { /* local rows */ 7485 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7486 if (!cmapt[cp]) { /* type-0, already global */ 7487 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7488 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7489 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7490 } else { /* type-2, local to global for sparse columns */ 7491 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7492 } 7493 ncoo_d += nz; 7494 } 7495 } 7496 } 7497 } 7498 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7499 PetscCall(ISDestroy(&glob)); 7500 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7501 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7502 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7503 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7504 7505 /* preallocate with COO data */ 7506 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7507 PetscCall(PetscFree2(coo_i, coo_j)); 7508 PetscFunctionReturn(0); 7509 } 7510 7511 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7512 { 7513 Mat_Product *product = mat->product; 7514 #if defined(PETSC_HAVE_DEVICE) 7515 PetscBool match = PETSC_FALSE; 7516 PetscBool usecpu = PETSC_FALSE; 7517 #else 7518 PetscBool match = PETSC_TRUE; 7519 #endif 7520 7521 PetscFunctionBegin; 7522 MatCheckProduct(mat, 1); 7523 #if defined(PETSC_HAVE_DEVICE) 7524 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7525 if (match) { /* we can always fallback to the CPU if requested */ 7526 switch (product->type) { 7527 case MATPRODUCT_AB: 7528 if (product->api_user) { 7529 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7530 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7531 PetscOptionsEnd(); 7532 } else { 7533 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7534 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7535 PetscOptionsEnd(); 7536 } 7537 break; 7538 case MATPRODUCT_AtB: 7539 if (product->api_user) { 7540 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7541 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7542 PetscOptionsEnd(); 7543 } else { 7544 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7545 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7546 PetscOptionsEnd(); 7547 } 7548 break; 7549 case MATPRODUCT_PtAP: 7550 if (product->api_user) { 7551 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7552 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7553 PetscOptionsEnd(); 7554 } else { 7555 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7556 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7557 PetscOptionsEnd(); 7558 } 7559 break; 7560 default: 7561 break; 7562 } 7563 match = (PetscBool)!usecpu; 7564 } 7565 #endif 7566 if (match) { 7567 switch (product->type) { 7568 case MATPRODUCT_AB: 7569 case MATPRODUCT_AtB: 7570 case MATPRODUCT_PtAP: 7571 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7572 break; 7573 default: 7574 break; 7575 } 7576 } 7577 /* fallback to MPIAIJ ops */ 7578 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7579 PetscFunctionReturn(0); 7580 } 7581 7582 /* 7583 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7584 7585 n - the number of block indices in cc[] 7586 cc - the block indices (must be large enough to contain the indices) 7587 */ 7588 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7589 { 7590 PetscInt cnt = -1, nidx, j; 7591 const PetscInt *idx; 7592 7593 PetscFunctionBegin; 7594 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7595 if (nidx) { 7596 cnt = 0; 7597 cc[cnt] = idx[0] / bs; 7598 for (j = 1; j < nidx; j++) { 7599 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7600 } 7601 } 7602 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7603 *n = cnt + 1; 7604 PetscFunctionReturn(0); 7605 } 7606 7607 /* 7608 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7609 7610 ncollapsed - the number of block indices 7611 collapsed - the block indices (must be large enough to contain the indices) 7612 */ 7613 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7614 { 7615 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7616 7617 PetscFunctionBegin; 7618 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7619 for (i = start + 1; i < start + bs; i++) { 7620 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7621 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7622 cprevtmp = cprev; 7623 cprev = merged; 7624 merged = cprevtmp; 7625 } 7626 *ncollapsed = nprev; 7627 if (collapsed) *collapsed = cprev; 7628 PetscFunctionReturn(0); 7629 } 7630 7631 /* 7632 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7633 */ 7634 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7635 { 7636 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7637 Mat tGmat; 7638 MPI_Comm comm; 7639 const PetscScalar *vals; 7640 const PetscInt *idx; 7641 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7642 MatScalar *AA; // this is checked in graph 7643 PetscBool isseqaij; 7644 Mat a, b, c; 7645 MatType jtype; 7646 7647 PetscFunctionBegin; 7648 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7649 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7650 PetscCall(MatGetType(Gmat, &jtype)); 7651 PetscCall(MatCreate(comm, &tGmat)); 7652 PetscCall(MatSetType(tGmat, jtype)); 7653 7654 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7655 Also, if the matrix is symmetric, can we skip this 7656 operation? It can be very expensive on large matrices. */ 7657 7658 // global sizes 7659 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7660 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7661 nloc = Iend - Istart; 7662 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7663 if (isseqaij) { 7664 a = Gmat; 7665 b = NULL; 7666 } else { 7667 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7668 a = d->A; 7669 b = d->B; 7670 garray = d->garray; 7671 } 7672 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7673 for (PetscInt row = 0; row < nloc; row++) { 7674 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7675 d_nnz[row] = ncols; 7676 if (ncols > maxcols) maxcols = ncols; 7677 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7678 } 7679 if (b) { 7680 for (PetscInt row = 0; row < nloc; row++) { 7681 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7682 o_nnz[row] = ncols; 7683 if (ncols > maxcols) maxcols = ncols; 7684 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7685 } 7686 } 7687 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7688 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7689 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7690 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7691 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7692 PetscCall(PetscFree2(d_nnz, o_nnz)); 7693 // 7694 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7695 nnz0 = nnz1 = 0; 7696 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7697 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7698 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7699 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7700 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7701 if (PetscRealPart(sv) > vfilter) { 7702 nnz1++; 7703 PetscInt cid = idx[jj] + Istart; //diag 7704 if (c != a) cid = garray[idx[jj]]; 7705 AA[ncol_row] = vals[jj]; 7706 AJ[ncol_row] = cid; 7707 ncol_row++; 7708 } 7709 } 7710 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7711 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7712 } 7713 } 7714 PetscCall(PetscFree2(AA, AJ)); 7715 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7716 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7717 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7718 7719 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7720 7721 *filteredG = tGmat; 7722 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7723 PetscFunctionReturn(0); 7724 } 7725 7726 /* 7727 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7728 7729 Input Parameter: 7730 . Amat - matrix 7731 - symmetrize - make the result symmetric 7732 + scale - scale with diagonal 7733 7734 Output Parameter: 7735 . a_Gmat - output scalar graph >= 0 7736 7737 */ 7738 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7739 { 7740 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7741 MPI_Comm comm; 7742 Mat Gmat; 7743 PetscBool ismpiaij, isseqaij; 7744 Mat a, b, c; 7745 MatType jtype; 7746 7747 PetscFunctionBegin; 7748 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7749 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7750 PetscCall(MatGetSize(Amat, &MM, &NN)); 7751 PetscCall(MatGetBlockSize(Amat, &bs)); 7752 nloc = (Iend - Istart) / bs; 7753 7754 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7755 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7756 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7757 7758 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7759 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7760 implementation */ 7761 if (bs > 1) { 7762 PetscCall(MatGetType(Amat, &jtype)); 7763 PetscCall(MatCreate(comm, &Gmat)); 7764 PetscCall(MatSetType(Gmat, jtype)); 7765 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7766 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7767 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7768 PetscInt *d_nnz, *o_nnz; 7769 MatScalar *aa, val, AA[4096]; 7770 PetscInt *aj, *ai, AJ[4096], nc; 7771 if (isseqaij) { 7772 a = Amat; 7773 b = NULL; 7774 } else { 7775 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7776 a = d->A; 7777 b = d->B; 7778 } 7779 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7780 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7781 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7782 PetscInt *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0; 7783 const PetscInt *cols; 7784 for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows 7785 PetscCall(MatGetRow(c, brow, &jj, &cols, NULL)); 7786 nnz[brow / bs] = jj / bs; 7787 if (jj % bs) ok = 0; 7788 if (cols) j0 = cols[0]; 7789 else j0 = -1; 7790 PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL)); 7791 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7792 for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks 7793 PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL)); 7794 if (jj % bs) ok = 0; 7795 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7796 if (nnz[brow / bs] != jj / bs) ok = 0; 7797 PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL)); 7798 } 7799 if (!ok) { 7800 PetscCall(PetscFree2(d_nnz, o_nnz)); 7801 goto old_bs; 7802 } 7803 } 7804 PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax); 7805 } 7806 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7807 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7808 PetscCall(PetscFree2(d_nnz, o_nnz)); 7809 // diag 7810 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7811 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7812 ai = aseq->i; 7813 n = ai[brow + 1] - ai[brow]; 7814 aj = aseq->j + ai[brow]; 7815 for (int k = 0; k < n; k += bs) { // block columns 7816 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7817 val = 0; 7818 for (int ii = 0; ii < bs; ii++) { // rows in block 7819 aa = aseq->a + ai[brow + ii] + k; 7820 for (int jj = 0; jj < bs; jj++) { // columns in block 7821 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7822 } 7823 } 7824 AA[k / bs] = val; 7825 } 7826 grow = Istart / bs + brow / bs; 7827 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7828 } 7829 // off-diag 7830 if (ismpiaij) { 7831 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7832 const PetscScalar *vals; 7833 const PetscInt *cols, *garray = aij->garray; 7834 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7835 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7836 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7837 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7838 AA[k / bs] = 0; 7839 AJ[cidx] = garray[cols[k]] / bs; 7840 } 7841 nc = ncols / bs; 7842 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7843 for (int ii = 0; ii < bs; ii++) { // rows in block 7844 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7845 for (int k = 0; k < ncols; k += bs) { 7846 for (int jj = 0; jj < bs; jj++) { // cols in block 7847 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7848 } 7849 } 7850 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7851 } 7852 grow = Istart / bs + brow / bs; 7853 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7854 } 7855 } 7856 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7857 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7858 } else { 7859 const PetscScalar *vals; 7860 const PetscInt *idx; 7861 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7862 old_bs: 7863 /* 7864 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7865 */ 7866 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7867 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7868 if (isseqaij) { 7869 PetscInt max_d_nnz; 7870 /* 7871 Determine exact preallocation count for (sequential) scalar matrix 7872 */ 7873 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7874 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7875 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7876 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7877 PetscCall(PetscFree3(w0, w1, w2)); 7878 } else if (ismpiaij) { 7879 Mat Daij, Oaij; 7880 const PetscInt *garray; 7881 PetscInt max_d_nnz; 7882 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7883 /* 7884 Determine exact preallocation count for diagonal block portion of scalar matrix 7885 */ 7886 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7887 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7888 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7889 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7890 PetscCall(PetscFree3(w0, w1, w2)); 7891 /* 7892 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7893 */ 7894 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7895 o_nnz[jj] = 0; 7896 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7897 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7898 o_nnz[jj] += ncols; 7899 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7900 } 7901 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7902 } 7903 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7904 /* get scalar copy (norms) of matrix */ 7905 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7906 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7907 PetscCall(PetscFree2(d_nnz, o_nnz)); 7908 for (Ii = Istart; Ii < Iend; Ii++) { 7909 PetscInt dest_row = Ii / bs; 7910 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7911 for (jj = 0; jj < ncols; jj++) { 7912 PetscInt dest_col = idx[jj] / bs; 7913 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7914 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7915 } 7916 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7917 } 7918 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7919 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7920 } 7921 } else { 7922 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7923 else { 7924 Gmat = Amat; 7925 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7926 } 7927 if (isseqaij) { 7928 a = Gmat; 7929 b = NULL; 7930 } else { 7931 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7932 a = d->A; 7933 b = d->B; 7934 } 7935 if (filter >= 0 || scale) { 7936 /* take absolute value of each entry */ 7937 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7938 MatInfo info; 7939 PetscScalar *avals; 7940 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7941 PetscCall(MatSeqAIJGetArray(c, &avals)); 7942 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7943 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7944 } 7945 } 7946 } 7947 if (symmetrize) { 7948 PetscBool isset, issym; 7949 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7950 if (!isset || !issym) { 7951 Mat matTrans; 7952 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7953 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7954 PetscCall(MatDestroy(&matTrans)); 7955 } 7956 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7957 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7958 if (scale) { 7959 /* scale c for all diagonal values = 1 or -1 */ 7960 Vec diag; 7961 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7962 PetscCall(MatGetDiagonal(Gmat, diag)); 7963 PetscCall(VecReciprocal(diag)); 7964 PetscCall(VecSqrtAbs(diag)); 7965 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7966 PetscCall(VecDestroy(&diag)); 7967 } 7968 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7969 7970 if (filter >= 0) { 7971 Mat Fmat = NULL; /* some silly compiler needs this */ 7972 7973 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 7974 PetscCall(MatDestroy(&Gmat)); 7975 Gmat = Fmat; 7976 } 7977 *a_Gmat = Gmat; 7978 PetscFunctionReturn(0); 7979 } 7980 7981 /* 7982 Special version for direct calls from Fortran 7983 */ 7984 #include <petsc/private/fortranimpl.h> 7985 7986 /* Change these macros so can be used in void function */ 7987 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7988 #undef PetscCall 7989 #define PetscCall(...) \ 7990 do { \ 7991 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7992 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7993 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 7994 return; \ 7995 } \ 7996 } while (0) 7997 7998 #undef SETERRQ 7999 #define SETERRQ(comm, ierr, ...) \ 8000 do { \ 8001 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8002 return; \ 8003 } while (0) 8004 8005 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8006 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8007 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8008 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8009 #else 8010 #endif 8011 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8012 { 8013 Mat mat = *mmat; 8014 PetscInt m = *mm, n = *mn; 8015 InsertMode addv = *maddv; 8016 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8017 PetscScalar value; 8018 8019 MatCheckPreallocated(mat, 1); 8020 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8021 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8022 { 8023 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8024 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8025 PetscBool roworiented = aij->roworiented; 8026 8027 /* Some Variables required in the macro */ 8028 Mat A = aij->A; 8029 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8030 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8031 MatScalar *aa; 8032 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8033 Mat B = aij->B; 8034 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8035 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8036 MatScalar *ba; 8037 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8038 * cannot use "#if defined" inside a macro. */ 8039 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8040 8041 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8042 PetscInt nonew = a->nonew; 8043 MatScalar *ap1, *ap2; 8044 8045 PetscFunctionBegin; 8046 PetscCall(MatSeqAIJGetArray(A, &aa)); 8047 PetscCall(MatSeqAIJGetArray(B, &ba)); 8048 for (i = 0; i < m; i++) { 8049 if (im[i] < 0) continue; 8050 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8051 if (im[i] >= rstart && im[i] < rend) { 8052 row = im[i] - rstart; 8053 lastcol1 = -1; 8054 rp1 = aj + ai[row]; 8055 ap1 = aa + ai[row]; 8056 rmax1 = aimax[row]; 8057 nrow1 = ailen[row]; 8058 low1 = 0; 8059 high1 = nrow1; 8060 lastcol2 = -1; 8061 rp2 = bj + bi[row]; 8062 ap2 = ba + bi[row]; 8063 rmax2 = bimax[row]; 8064 nrow2 = bilen[row]; 8065 low2 = 0; 8066 high2 = nrow2; 8067 8068 for (j = 0; j < n; j++) { 8069 if (roworiented) value = v[i * n + j]; 8070 else value = v[i + j * m]; 8071 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8072 if (in[j] >= cstart && in[j] < cend) { 8073 col = in[j] - cstart; 8074 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8075 } else if (in[j] < 0) continue; 8076 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8077 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 8078 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8079 } else { 8080 if (mat->was_assembled) { 8081 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8082 #if defined(PETSC_USE_CTABLE) 8083 PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); 8084 col--; 8085 #else 8086 col = aij->colmap[in[j]] - 1; 8087 #endif 8088 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8089 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8090 col = in[j]; 8091 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8092 B = aij->B; 8093 b = (Mat_SeqAIJ *)B->data; 8094 bimax = b->imax; 8095 bi = b->i; 8096 bilen = b->ilen; 8097 bj = b->j; 8098 rp2 = bj + bi[row]; 8099 ap2 = ba + bi[row]; 8100 rmax2 = bimax[row]; 8101 nrow2 = bilen[row]; 8102 low2 = 0; 8103 high2 = nrow2; 8104 bm = aij->B->rmap->n; 8105 ba = b->a; 8106 inserted = PETSC_FALSE; 8107 } 8108 } else col = in[j]; 8109 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8110 } 8111 } 8112 } else if (!aij->donotstash) { 8113 if (roworiented) { 8114 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8115 } else { 8116 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8117 } 8118 } 8119 } 8120 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8121 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8122 } 8123 PetscFunctionReturnVoid(); 8124 } 8125 8126 /* Undefining these here since they were redefined from their original definition above! No 8127 * other PETSc functions should be defined past this point, as it is impossible to recover the 8128 * original definitions */ 8129 #undef PetscCall 8130 #undef SETERRQ 8131