1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 15 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 16 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 17 PetscCall(MatDestroy(&B)); 18 PetscFunctionReturn(PETSC_SUCCESS); 19 } 20 21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 22 { 23 Mat B; 24 25 PetscFunctionBegin; 26 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 27 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 28 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 29 PetscFunctionReturn(PETSC_SUCCESS); 30 } 31 32 /*MC 33 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 34 35 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 36 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 37 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 38 for communicators controlling multiple processes. It is recommended that you call both of 39 the above preallocation routines for simplicity. 40 41 Options Database Keys: 42 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 43 44 Developer Note: 45 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 46 enough exist. 47 48 Level: beginner 49 50 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 51 M*/ 52 53 /*MC 54 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 55 56 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 57 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 58 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 59 for communicators controlling multiple processes. It is recommended that you call both of 60 the above preallocation routines for simplicity. 61 62 Options Database Keys: 63 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 64 65 Level: beginner 66 67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 68 M*/ 69 70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 71 { 72 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 73 74 PetscFunctionBegin; 75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 76 A->boundtocpu = flg; 77 #endif 78 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 79 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 80 81 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 82 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 83 * to differ from the parent matrix. */ 84 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 85 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 86 87 PetscFunctionReturn(PETSC_SUCCESS); 88 } 89 90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 91 { 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 93 94 PetscFunctionBegin; 95 if (mat->A) { 96 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 97 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 98 } 99 PetscFunctionReturn(PETSC_SUCCESS); 100 } 101 102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 103 { 104 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 105 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 106 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 107 const PetscInt *ia, *ib; 108 const MatScalar *aa, *bb, *aav, *bav; 109 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 110 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 111 112 PetscFunctionBegin; 113 *keptrows = NULL; 114 115 ia = a->i; 116 ib = b->i; 117 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 118 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 119 for (i = 0; i < m; i++) { 120 na = ia[i + 1] - ia[i]; 121 nb = ib[i + 1] - ib[i]; 122 if (!na && !nb) { 123 cnt++; 124 goto ok1; 125 } 126 aa = aav + ia[i]; 127 for (j = 0; j < na; j++) { 128 if (aa[j] != 0.0) goto ok1; 129 } 130 bb = bav + ib[i]; 131 for (j = 0; j < nb; j++) { 132 if (bb[j] != 0.0) goto ok1; 133 } 134 cnt++; 135 ok1:; 136 } 137 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 138 if (!n0rows) { 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 140 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 141 PetscFunctionReturn(PETSC_SUCCESS); 142 } 143 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 144 cnt = 0; 145 for (i = 0; i < m; i++) { 146 na = ia[i + 1] - ia[i]; 147 nb = ib[i + 1] - ib[i]; 148 if (!na && !nb) continue; 149 aa = aav + ia[i]; 150 for (j = 0; j < na; j++) { 151 if (aa[j] != 0.0) { 152 rows[cnt++] = rstart + i; 153 goto ok2; 154 } 155 } 156 bb = bav + ib[i]; 157 for (j = 0; j < nb; j++) { 158 if (bb[j] != 0.0) { 159 rows[cnt++] = rstart + i; 160 goto ok2; 161 } 162 } 163 ok2:; 164 } 165 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 167 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 168 PetscFunctionReturn(PETSC_SUCCESS); 169 } 170 171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 174 PetscBool cong; 175 176 PetscFunctionBegin; 177 PetscCall(MatHasCongruentLayouts(Y, &cong)); 178 if (Y->assembled && cong) { 179 PetscCall(MatDiagonalSet(aij->A, D, is)); 180 } else { 181 PetscCall(MatDiagonalSet_Default(Y, D, is)); 182 } 183 PetscFunctionReturn(PETSC_SUCCESS); 184 } 185 186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 189 PetscInt i, rstart, nrows, *rows; 190 191 PetscFunctionBegin; 192 *zrows = NULL; 193 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 194 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 195 for (i = 0; i < nrows; i++) rows[i] += rstart; 196 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 197 PetscFunctionReturn(PETSC_SUCCESS); 198 } 199 200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 201 { 202 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 203 PetscInt i, m, n, *garray = aij->garray; 204 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 205 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 206 PetscReal *work; 207 const PetscScalar *dummy; 208 209 PetscFunctionBegin; 210 PetscCall(MatGetSize(A, &m, &n)); 211 PetscCall(PetscCalloc1(n, &work)); 212 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 213 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 214 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 215 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 216 if (type == NORM_2) { 217 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 218 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 219 } else if (type == NORM_1) { 220 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 221 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 222 } else if (type == NORM_INFINITY) { 223 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 224 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 225 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 226 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 227 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 228 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 229 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 230 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 231 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 232 if (type == NORM_INFINITY) { 233 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 234 } else { 235 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 236 } 237 PetscCall(PetscFree(work)); 238 if (type == NORM_2) { 239 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 240 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 241 for (i = 0; i < n; i++) reductions[i] /= m; 242 } 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 247 { 248 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 249 IS sis, gis; 250 const PetscInt *isis, *igis; 251 PetscInt n, *iis, nsis, ngis, rstart, i; 252 253 PetscFunctionBegin; 254 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 255 PetscCall(MatFindNonzeroRows(a->B, &gis)); 256 PetscCall(ISGetSize(gis, &ngis)); 257 PetscCall(ISGetSize(sis, &nsis)); 258 PetscCall(ISGetIndices(sis, &isis)); 259 PetscCall(ISGetIndices(gis, &igis)); 260 261 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 262 PetscCall(PetscArraycpy(iis, igis, ngis)); 263 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 264 n = ngis + nsis; 265 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 266 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 267 for (i = 0; i < n; i++) iis[i] += rstart; 268 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 269 270 PetscCall(ISRestoreIndices(sis, &isis)); 271 PetscCall(ISRestoreIndices(gis, &igis)); 272 PetscCall(ISDestroy(&sis)); 273 PetscCall(ISDestroy(&gis)); 274 PetscFunctionReturn(PETSC_SUCCESS); 275 } 276 277 /* 278 Local utility routine that creates a mapping from the global column 279 number to the local number in the off-diagonal part of the local 280 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 281 a slightly higher hash table cost; without it it is not scalable (each processor 282 has an order N integer array but is fast to access. 283 */ 284 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 287 PetscInt n = aij->B->cmap->n, i; 288 289 PetscFunctionBegin; 290 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 291 #if defined(PETSC_USE_CTABLE) 292 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 293 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 294 #else 295 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 296 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 297 #endif 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 302 { \ 303 if (col <= lastcol1) low1 = 0; \ 304 else high1 = nrow1; \ 305 lastcol1 = col; \ 306 while (high1 - low1 > 5) { \ 307 t = (low1 + high1) / 2; \ 308 if (rp1[t] > col) high1 = t; \ 309 else low1 = t; \ 310 } \ 311 for (_i = low1; _i < high1; _i++) { \ 312 if (rp1[_i] > col) break; \ 313 if (rp1[_i] == col) { \ 314 if (addv == ADD_VALUES) { \ 315 ap1[_i] += value; \ 316 /* Not sure LogFlops will slow dow the code or not */ \ 317 (void)PetscLogFlops(1.0); \ 318 } else ap1[_i] = value; \ 319 goto a_noinsert; \ 320 } \ 321 } \ 322 if (value == 0.0 && ignorezeroentries && row != col) { \ 323 low1 = 0; \ 324 high1 = nrow1; \ 325 goto a_noinsert; \ 326 } \ 327 if (nonew == 1) { \ 328 low1 = 0; \ 329 high1 = nrow1; \ 330 goto a_noinsert; \ 331 } \ 332 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 333 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 334 N = nrow1++ - 1; \ 335 a->nz++; \ 336 high1++; \ 337 /* shift up all the later entries in this row */ \ 338 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 339 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 340 rp1[_i] = col; \ 341 ap1[_i] = value; \ 342 A->nonzerostate++; \ 343 a_noinsert:; \ 344 ailen[row] = nrow1; \ 345 } 346 347 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 348 { \ 349 if (col <= lastcol2) low2 = 0; \ 350 else high2 = nrow2; \ 351 lastcol2 = col; \ 352 while (high2 - low2 > 5) { \ 353 t = (low2 + high2) / 2; \ 354 if (rp2[t] > col) high2 = t; \ 355 else low2 = t; \ 356 } \ 357 for (_i = low2; _i < high2; _i++) { \ 358 if (rp2[_i] > col) break; \ 359 if (rp2[_i] == col) { \ 360 if (addv == ADD_VALUES) { \ 361 ap2[_i] += value; \ 362 (void)PetscLogFlops(1.0); \ 363 } else ap2[_i] = value; \ 364 goto b_noinsert; \ 365 } \ 366 } \ 367 if (value == 0.0 && ignorezeroentries) { \ 368 low2 = 0; \ 369 high2 = nrow2; \ 370 goto b_noinsert; \ 371 } \ 372 if (nonew == 1) { \ 373 low2 = 0; \ 374 high2 = nrow2; \ 375 goto b_noinsert; \ 376 } \ 377 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 378 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 379 N = nrow2++ - 1; \ 380 b->nz++; \ 381 high2++; \ 382 /* shift up all the later entries in this row */ \ 383 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 384 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 385 rp2[_i] = col; \ 386 ap2[_i] = value; \ 387 B->nonzerostate++; \ 388 b_noinsert:; \ 389 bilen[row] = nrow2; \ 390 } 391 392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 393 { 394 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 395 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 396 PetscInt l, *garray = mat->garray, diag; 397 PetscScalar *aa, *ba; 398 399 PetscFunctionBegin; 400 /* code only works for square matrices A */ 401 402 /* find size of row to the left of the diagonal part */ 403 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 404 row = row - diag; 405 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 406 if (garray[b->j[b->i[row] + l]] > diag) break; 407 } 408 if (l) { 409 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 410 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 411 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 412 } 413 414 /* diagonal part */ 415 if (a->i[row + 1] - a->i[row]) { 416 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 417 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 418 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 419 } 420 421 /* right of diagonal part */ 422 if (b->i[row + 1] - b->i[row] - l) { 423 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 424 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 425 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 426 } 427 PetscFunctionReturn(PETSC_SUCCESS); 428 } 429 430 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 431 { 432 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 433 PetscScalar value = 0.0; 434 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 435 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 436 PetscBool roworiented = aij->roworiented; 437 438 /* Some Variables required in the macro */ 439 Mat A = aij->A; 440 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 441 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 442 PetscBool ignorezeroentries = a->ignorezeroentries; 443 Mat B = aij->B; 444 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 445 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 446 MatScalar *aa, *ba; 447 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 448 PetscInt nonew; 449 MatScalar *ap1, *ap2; 450 451 PetscFunctionBegin; 452 PetscCall(MatSeqAIJGetArray(A, &aa)); 453 PetscCall(MatSeqAIJGetArray(B, &ba)); 454 for (i = 0; i < m; i++) { 455 if (im[i] < 0) continue; 456 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 lastcol1 = -1; 460 rp1 = aj + ai[row]; 461 ap1 = aa + ai[row]; 462 rmax1 = aimax[row]; 463 nrow1 = ailen[row]; 464 low1 = 0; 465 high1 = nrow1; 466 lastcol2 = -1; 467 rp2 = bj + bi[row]; 468 ap2 = ba + bi[row]; 469 rmax2 = bimax[row]; 470 nrow2 = bilen[row]; 471 low2 = 0; 472 high2 = nrow2; 473 474 for (j = 0; j < n; j++) { 475 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 476 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 477 if (in[j] >= cstart && in[j] < cend) { 478 col = in[j] - cstart; 479 nonew = a->nonew; 480 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 481 } else if (in[j] < 0) { 482 continue; 483 } else { 484 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 485 if (mat->was_assembled) { 486 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 487 #if defined(PETSC_USE_CTABLE) 488 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 489 col--; 490 #else 491 col = aij->colmap[in[j]] - 1; 492 #endif 493 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 494 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 495 col = in[j]; 496 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 497 B = aij->B; 498 b = (Mat_SeqAIJ *)B->data; 499 bimax = b->imax; 500 bi = b->i; 501 bilen = b->ilen; 502 bj = b->j; 503 ba = b->a; 504 rp2 = bj + bi[row]; 505 ap2 = ba + bi[row]; 506 rmax2 = bimax[row]; 507 nrow2 = bilen[row]; 508 low2 = 0; 509 high2 = nrow2; 510 bm = aij->B->rmap->n; 511 ba = b->a; 512 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 513 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 514 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 515 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 516 } 517 } else col = in[j]; 518 nonew = b->nonew; 519 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 520 } 521 } 522 } else { 523 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 524 if (!aij->donotstash) { 525 mat->assembled = PETSC_FALSE; 526 if (roworiented) { 527 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 528 } else { 529 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } 531 } 532 } 533 } 534 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 535 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 536 PetscFunctionReturn(PETSC_SUCCESS); 537 } 538 539 /* 540 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 541 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 542 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 543 */ 544 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 545 { 546 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 547 Mat A = aij->A; /* diagonal part of the matrix */ 548 Mat B = aij->B; /* offdiagonal part of the matrix */ 549 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 550 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 551 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 552 PetscInt *ailen = a->ilen, *aj = a->j; 553 PetscInt *bilen = b->ilen, *bj = b->j; 554 PetscInt am = aij->A->rmap->n, j; 555 PetscInt diag_so_far = 0, dnz; 556 PetscInt offd_so_far = 0, onz; 557 558 PetscFunctionBegin; 559 /* Iterate over all rows of the matrix */ 560 for (j = 0; j < am; j++) { 561 dnz = onz = 0; 562 /* Iterate over all non-zero columns of the current row */ 563 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 564 /* If column is in the diagonal */ 565 if (mat_j[col] >= cstart && mat_j[col] < cend) { 566 aj[diag_so_far++] = mat_j[col] - cstart; 567 dnz++; 568 } else { /* off-diagonal entries */ 569 bj[offd_so_far++] = mat_j[col]; 570 onz++; 571 } 572 } 573 ailen[j] = dnz; 574 bilen[j] = onz; 575 } 576 PetscFunctionReturn(PETSC_SUCCESS); 577 } 578 579 /* 580 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 581 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 582 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 583 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 584 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 585 */ 586 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 587 { 588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 589 Mat A = aij->A; /* diagonal part of the matrix */ 590 Mat B = aij->B; /* offdiagonal part of the matrix */ 591 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 592 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 593 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 594 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 595 PetscInt *ailen = a->ilen, *aj = a->j; 596 PetscInt *bilen = b->ilen, *bj = b->j; 597 PetscInt am = aij->A->rmap->n, j; 598 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 599 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 600 PetscScalar *aa = a->a, *ba = b->a; 601 602 PetscFunctionBegin; 603 /* Iterate over all rows of the matrix */ 604 for (j = 0; j < am; j++) { 605 dnz_row = onz_row = 0; 606 rowstart_offd = full_offd_i[j]; 607 rowstart_diag = full_diag_i[j]; 608 /* Iterate over all non-zero columns of the current row */ 609 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 610 /* If column is in the diagonal */ 611 if (mat_j[col] >= cstart && mat_j[col] < cend) { 612 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 613 aa[rowstart_diag + dnz_row] = mat_a[col]; 614 dnz_row++; 615 } else { /* off-diagonal entries */ 616 bj[rowstart_offd + onz_row] = mat_j[col]; 617 ba[rowstart_offd + onz_row] = mat_a[col]; 618 onz_row++; 619 } 620 } 621 ailen[j] = dnz_row; 622 bilen[j] = onz_row; 623 } 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 628 { 629 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 630 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 632 633 PetscFunctionBegin; 634 for (i = 0; i < m; i++) { 635 if (idxm[i] < 0) continue; /* negative row */ 636 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 637 if (idxm[i] >= rstart && idxm[i] < rend) { 638 row = idxm[i] - rstart; 639 for (j = 0; j < n; j++) { 640 if (idxn[j] < 0) continue; /* negative column */ 641 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 642 if (idxn[j] >= cstart && idxn[j] < cend) { 643 col = idxn[j] - cstart; 644 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 645 } else { 646 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 647 #if defined(PETSC_USE_CTABLE) 648 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 649 col--; 650 #else 651 col = aij->colmap[idxn[j]] - 1; 652 #endif 653 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 654 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 655 } 656 } 657 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 658 } 659 PetscFunctionReturn(PETSC_SUCCESS); 660 } 661 662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 663 { 664 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 665 PetscInt nstash, reallocs; 666 667 PetscFunctionBegin; 668 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 669 670 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 671 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 672 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 673 PetscFunctionReturn(PETSC_SUCCESS); 674 } 675 676 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 679 PetscMPIInt n; 680 PetscInt i, j, rstart, ncols, flg; 681 PetscInt *row, *col; 682 PetscBool other_disassembled; 683 PetscScalar *val; 684 685 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 686 687 PetscFunctionBegin; 688 if (!aij->donotstash && !mat->nooffprocentries) { 689 while (1) { 690 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 691 if (!flg) break; 692 693 for (i = 0; i < n;) { 694 /* Now identify the consecutive vals belonging to the same row */ 695 for (j = i, rstart = row[j]; j < n; j++) { 696 if (row[j] != rstart) break; 697 } 698 if (j < n) ncols = j - i; 699 else ncols = n - i; 700 /* Now assemble all these values with a single function call */ 701 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 702 i = j; 703 } 704 } 705 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 706 } 707 #if defined(PETSC_HAVE_DEVICE) 708 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 709 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 710 if (mat->boundtocpu) { 711 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 712 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 713 } 714 #endif 715 PetscCall(MatAssemblyBegin(aij->A, mode)); 716 PetscCall(MatAssemblyEnd(aij->A, mode)); 717 718 /* determine if any processor has disassembled, if so we must 719 also disassemble ourself, in order that we may reassemble. */ 720 /* 721 if nonzero structure of submatrix B cannot change then we know that 722 no processor disassembled thus we can skip this stuff 723 */ 724 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 725 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 726 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 727 PetscCall(MatDisAssemble_MPIAIJ(mat)); 728 } 729 } 730 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 731 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 732 #if defined(PETSC_HAVE_DEVICE) 733 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 734 #endif 735 PetscCall(MatAssemblyBegin(aij->B, mode)); 736 PetscCall(MatAssemblyEnd(aij->B, mode)); 737 738 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 739 740 aij->rowvalues = NULL; 741 742 PetscCall(VecDestroy(&aij->diag)); 743 744 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 745 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 746 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 747 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 748 } 749 #if defined(PETSC_HAVE_DEVICE) 750 mat->offloadmask = PETSC_OFFLOAD_BOTH; 751 #endif 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 756 { 757 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 758 759 PetscFunctionBegin; 760 PetscCall(MatZeroEntries(l->A)); 761 PetscCall(MatZeroEntries(l->B)); 762 PetscFunctionReturn(PETSC_SUCCESS); 763 } 764 765 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 766 { 767 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 768 PetscObjectState sA, sB; 769 PetscInt *lrows; 770 PetscInt r, len; 771 PetscBool cong, lch, gch; 772 773 PetscFunctionBegin; 774 /* get locally owned rows */ 775 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 776 PetscCall(MatHasCongruentLayouts(A, &cong)); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 783 PetscCall(VecGetArrayRead(x, &xx)); 784 PetscCall(VecGetArray(b, &bb)); 785 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 786 PetscCall(VecRestoreArrayRead(x, &xx)); 787 PetscCall(VecRestoreArray(b, &bb)); 788 } 789 790 sA = mat->A->nonzerostate; 791 sB = mat->B->nonzerostate; 792 793 if (diag != 0.0 && cong) { 794 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 795 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 796 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 797 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 798 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 799 PetscInt nnwA, nnwB; 800 PetscBool nnzA, nnzB; 801 802 nnwA = aijA->nonew; 803 nnwB = aijB->nonew; 804 nnzA = aijA->keepnonzeropattern; 805 nnzB = aijB->keepnonzeropattern; 806 if (!nnzA) { 807 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 808 aijA->nonew = 0; 809 } 810 if (!nnzB) { 811 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 812 aijB->nonew = 0; 813 } 814 /* Must zero here before the next loop */ 815 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 816 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 if (row >= A->cmap->N) continue; 820 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 821 } 822 aijA->nonew = nnwA; 823 aijB->nonew = nnwB; 824 } else { 825 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 826 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 827 } 828 PetscCall(PetscFree(lrows)); 829 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 830 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 831 832 /* reduce nonzerostate */ 833 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 834 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 835 if (gch) A->nonzerostate++; 836 PetscFunctionReturn(PETSC_SUCCESS); 837 } 838 839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 840 { 841 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i, j, r, m, len = 0; 844 PetscInt *lrows, *owners = A->rmap->range; 845 PetscMPIInt p = 0; 846 PetscSFNode *rrows; 847 PetscSF sf; 848 const PetscScalar *xx; 849 PetscScalar *bb, *mask, *aij_a; 850 Vec xmask, lmask; 851 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 852 const PetscInt *aj, *ii, *ridx; 853 PetscScalar *aa; 854 855 PetscFunctionBegin; 856 /* Create SF where leaves are input rows and roots are owned rows */ 857 PetscCall(PetscMalloc1(n, &lrows)); 858 for (r = 0; r < n; ++r) lrows[r] = -1; 859 PetscCall(PetscMalloc1(N, &rrows)); 860 for (r = 0; r < N; ++r) { 861 const PetscInt idx = rows[r]; 862 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 863 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 864 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 865 } 866 rrows[r].rank = p; 867 rrows[r].index = rows[r] - owners[p]; 868 } 869 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 870 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 871 /* Collect flags for rows to be zeroed */ 872 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 873 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 874 PetscCall(PetscSFDestroy(&sf)); 875 /* Compress and put in row numbers */ 876 for (r = 0; r < n; ++r) 877 if (lrows[r] >= 0) lrows[len++] = r; 878 /* zero diagonal part of matrix */ 879 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 880 /* handle off diagonal part of matrix */ 881 PetscCall(MatCreateVecs(A, &xmask, NULL)); 882 PetscCall(VecDuplicate(l->lvec, &lmask)); 883 PetscCall(VecGetArray(xmask, &bb)); 884 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 885 PetscCall(VecRestoreArray(xmask, &bb)); 886 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 887 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 888 PetscCall(VecDestroy(&xmask)); 889 if (x && b) { /* this code is buggy when the row and column layout don't match */ 890 PetscBool cong; 891 892 PetscCall(MatHasCongruentLayouts(A, &cong)); 893 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 894 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 895 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 896 PetscCall(VecGetArrayRead(l->lvec, &xx)); 897 PetscCall(VecGetArray(b, &bb)); 898 } 899 PetscCall(VecGetArray(lmask, &mask)); 900 /* remove zeroed rows of off diagonal matrix */ 901 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 902 ii = aij->i; 903 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 904 /* loop over all elements of off process part of matrix zeroing removed columns*/ 905 if (aij->compressedrow.use) { 906 m = aij->compressedrow.nrows; 907 ii = aij->compressedrow.i; 908 ridx = aij->compressedrow.rindex; 909 for (i = 0; i < m; i++) { 910 n = ii[i + 1] - ii[i]; 911 aj = aij->j + ii[i]; 912 aa = aij_a + ii[i]; 913 914 for (j = 0; j < n; j++) { 915 if (PetscAbsScalar(mask[*aj])) { 916 if (b) bb[*ridx] -= *aa * xx[*aj]; 917 *aa = 0.0; 918 } 919 aa++; 920 aj++; 921 } 922 ridx++; 923 } 924 } else { /* do not use compressed row format */ 925 m = l->B->rmap->n; 926 for (i = 0; i < m; i++) { 927 n = ii[i + 1] - ii[i]; 928 aj = aij->j + ii[i]; 929 aa = aij_a + ii[i]; 930 for (j = 0; j < n; j++) { 931 if (PetscAbsScalar(mask[*aj])) { 932 if (b) bb[i] -= *aa * xx[*aj]; 933 *aa = 0.0; 934 } 935 aa++; 936 aj++; 937 } 938 } 939 } 940 if (x && b) { 941 PetscCall(VecRestoreArray(b, &bb)); 942 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 943 } 944 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 945 PetscCall(VecRestoreArray(lmask, &mask)); 946 PetscCall(VecDestroy(&lmask)); 947 PetscCall(PetscFree(lrows)); 948 949 /* only change matrix nonzero state if pattern was allowed to be changed */ 950 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 951 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 952 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 953 } 954 PetscFunctionReturn(PETSC_SUCCESS); 955 } 956 957 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 958 { 959 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 960 PetscInt nt; 961 VecScatter Mvctx = a->Mvctx; 962 963 PetscFunctionBegin; 964 PetscCall(VecGetLocalSize(xx, &nt)); 965 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 966 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 967 PetscUseTypeMethod(a->A, mult, xx, yy); 968 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 969 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 970 PetscFunctionReturn(PETSC_SUCCESS); 971 } 972 973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 976 977 PetscFunctionBegin; 978 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 979 PetscFunctionReturn(PETSC_SUCCESS); 980 } 981 982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 985 VecScatter Mvctx = a->Mvctx; 986 987 PetscFunctionBegin; 988 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 989 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 990 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 992 PetscFunctionReturn(PETSC_SUCCESS); 993 } 994 995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 998 999 PetscFunctionBegin; 1000 /* do nondiagonal part */ 1001 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1002 /* do local part */ 1003 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1004 /* add partial results together */ 1005 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1006 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1007 PetscFunctionReturn(PETSC_SUCCESS); 1008 } 1009 1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1011 { 1012 MPI_Comm comm; 1013 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij; 1014 Mat Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs; 1015 IS Me, Notme; 1016 PetscInt M, N, first, last, *notme, i; 1017 PetscBool lf; 1018 PetscMPIInt size; 1019 1020 PetscFunctionBegin; 1021 /* Easy test: symmetric diagonal block */ 1022 Bij = (Mat_MPIAIJ *)Bmat->data; 1023 Bdia = Bij->A; 1024 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1025 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1026 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1027 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1028 PetscCallMPI(MPI_Comm_size(comm, &size)); 1029 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1030 1031 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1032 PetscCall(MatGetSize(Amat, &M, &N)); 1033 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1034 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1035 for (i = 0; i < first; i++) notme[i] = i; 1036 for (i = last; i < M; i++) notme[i - last + first] = i; 1037 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1038 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1039 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1040 Aoff = Aoffs[0]; 1041 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1042 Boff = Boffs[0]; 1043 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1044 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1045 PetscCall(MatDestroyMatrices(1, &Boffs)); 1046 PetscCall(ISDestroy(&Me)); 1047 PetscCall(ISDestroy(&Notme)); 1048 PetscCall(PetscFree(notme)); 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1053 { 1054 PetscFunctionBegin; 1055 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1056 PetscFunctionReturn(PETSC_SUCCESS); 1057 } 1058 1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1060 { 1061 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1062 1063 PetscFunctionBegin; 1064 /* do nondiagonal part */ 1065 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1066 /* do local part */ 1067 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1068 /* add partial results together */ 1069 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1070 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1071 PetscFunctionReturn(PETSC_SUCCESS); 1072 } 1073 1074 /* 1075 This only works correctly for square matrices where the subblock A->A is the 1076 diagonal block 1077 */ 1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1079 { 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1081 1082 PetscFunctionBegin; 1083 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1084 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1085 PetscCall(MatGetDiagonal(a->A, v)); 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1092 1093 PetscFunctionBegin; 1094 PetscCall(MatScale(a->A, aa)); 1095 PetscCall(MatScale(a->B, aa)); 1096 PetscFunctionReturn(PETSC_SUCCESS); 1097 } 1098 1099 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1100 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1101 { 1102 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1103 1104 PetscFunctionBegin; 1105 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1106 PetscCall(PetscFree(aij->Aperm1)); 1107 PetscCall(PetscFree(aij->Bperm1)); 1108 PetscCall(PetscFree(aij->Ajmap1)); 1109 PetscCall(PetscFree(aij->Bjmap1)); 1110 1111 PetscCall(PetscFree(aij->Aimap2)); 1112 PetscCall(PetscFree(aij->Bimap2)); 1113 PetscCall(PetscFree(aij->Aperm2)); 1114 PetscCall(PetscFree(aij->Bperm2)); 1115 PetscCall(PetscFree(aij->Ajmap2)); 1116 PetscCall(PetscFree(aij->Bjmap2)); 1117 1118 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1119 PetscCall(PetscFree(aij->Cperm1)); 1120 PetscFunctionReturn(PETSC_SUCCESS); 1121 } 1122 1123 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1124 { 1125 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1126 1127 PetscFunctionBegin; 1128 #if defined(PETSC_USE_LOG) 1129 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 1130 #endif 1131 PetscCall(MatStashDestroy_Private(&mat->stash)); 1132 PetscCall(VecDestroy(&aij->diag)); 1133 PetscCall(MatDestroy(&aij->A)); 1134 PetscCall(MatDestroy(&aij->B)); 1135 #if defined(PETSC_USE_CTABLE) 1136 PetscCall(PetscHMapIDestroy(&aij->colmap)); 1137 #else 1138 PetscCall(PetscFree(aij->colmap)); 1139 #endif 1140 PetscCall(PetscFree(aij->garray)); 1141 PetscCall(VecDestroy(&aij->lvec)); 1142 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1143 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 1144 PetscCall(PetscFree(aij->ld)); 1145 1146 /* Free COO */ 1147 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1148 1149 PetscCall(PetscFree(mat->data)); 1150 1151 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1152 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 1153 1154 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1157 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 1158 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 1159 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 1160 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 1161 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 1164 #if defined(PETSC_HAVE_CUDA) 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 1166 #endif 1167 #if defined(PETSC_HAVE_HIP) 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 1169 #endif 1170 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 1172 #endif 1173 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 1174 #if defined(PETSC_HAVE_ELEMENTAL) 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 1176 #endif 1177 #if defined(PETSC_HAVE_SCALAPACK) 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 1179 #endif 1180 #if defined(PETSC_HAVE_HYPRE) 1181 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 1182 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 1183 #endif 1184 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1185 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 1188 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 1190 #if defined(PETSC_HAVE_MKL_SPARSE) 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 1192 #endif 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1195 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 1197 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 1198 PetscFunctionReturn(PETSC_SUCCESS); 1199 } 1200 1201 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1202 { 1203 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1204 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1205 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1206 const PetscInt *garray = aij->garray; 1207 const PetscScalar *aa, *ba; 1208 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1209 PetscInt *rowlens; 1210 PetscInt *colidxs; 1211 PetscScalar *matvals; 1212 1213 PetscFunctionBegin; 1214 PetscCall(PetscViewerSetUp(viewer)); 1215 1216 M = mat->rmap->N; 1217 N = mat->cmap->N; 1218 m = mat->rmap->n; 1219 rs = mat->rmap->rstart; 1220 cs = mat->cmap->rstart; 1221 nz = A->nz + B->nz; 1222 1223 /* write matrix header */ 1224 header[0] = MAT_FILE_CLASSID; 1225 header[1] = M; 1226 header[2] = N; 1227 header[3] = nz; 1228 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1229 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1230 1231 /* fill in and store row lengths */ 1232 PetscCall(PetscMalloc1(m, &rowlens)); 1233 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1234 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1235 PetscCall(PetscFree(rowlens)); 1236 1237 /* fill in and store column indices */ 1238 PetscCall(PetscMalloc1(nz, &colidxs)); 1239 for (cnt = 0, i = 0; i < m; i++) { 1240 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1241 if (garray[B->j[jb]] > cs) break; 1242 colidxs[cnt++] = garray[B->j[jb]]; 1243 } 1244 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1245 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1246 } 1247 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1248 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1249 PetscCall(PetscFree(colidxs)); 1250 1251 /* fill in and store nonzero values */ 1252 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1253 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1254 PetscCall(PetscMalloc1(nz, &matvals)); 1255 for (cnt = 0, i = 0; i < m; i++) { 1256 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1257 if (garray[B->j[jb]] > cs) break; 1258 matvals[cnt++] = ba[jb]; 1259 } 1260 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1261 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1262 } 1263 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1264 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1265 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1266 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1267 PetscCall(PetscFree(matvals)); 1268 1269 /* write block size option to the viewer's .info file */ 1270 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 1274 #include <petscdraw.h> 1275 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1276 { 1277 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1278 PetscMPIInt rank = aij->rank, size = aij->size; 1279 PetscBool isdraw, iascii, isbinary; 1280 PetscViewer sviewer; 1281 PetscViewerFormat format; 1282 1283 PetscFunctionBegin; 1284 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1285 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1286 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1287 if (iascii) { 1288 PetscCall(PetscViewerGetFormat(viewer, &format)); 1289 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1290 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1291 PetscCall(PetscMalloc1(size, &nz)); 1292 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1293 for (i = 0; i < (PetscInt)size; i++) { 1294 nmax = PetscMax(nmax, nz[i]); 1295 nmin = PetscMin(nmin, nz[i]); 1296 navg += nz[i]; 1297 } 1298 PetscCall(PetscFree(nz)); 1299 navg = navg / size; 1300 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1301 PetscFunctionReturn(PETSC_SUCCESS); 1302 } 1303 PetscCall(PetscViewerGetFormat(viewer, &format)); 1304 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1305 MatInfo info; 1306 PetscInt *inodes = NULL; 1307 1308 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1309 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1310 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1311 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1312 if (!inodes) { 1313 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1314 (double)info.memory)); 1315 } else { 1316 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1317 (double)info.memory)); 1318 } 1319 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1320 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1321 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1322 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1323 PetscCall(PetscViewerFlush(viewer)); 1324 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1325 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1326 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1327 PetscFunctionReturn(PETSC_SUCCESS); 1328 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1329 PetscInt inodecount, inodelimit, *inodes; 1330 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1331 if (inodes) { 1332 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1333 } else { 1334 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1335 } 1336 PetscFunctionReturn(PETSC_SUCCESS); 1337 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } 1340 } else if (isbinary) { 1341 if (size == 1) { 1342 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1343 PetscCall(MatView(aij->A, viewer)); 1344 } else { 1345 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1346 } 1347 PetscFunctionReturn(PETSC_SUCCESS); 1348 } else if (iascii && size == 1) { 1349 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1350 PetscCall(MatView(aij->A, viewer)); 1351 PetscFunctionReturn(PETSC_SUCCESS); 1352 } else if (isdraw) { 1353 PetscDraw draw; 1354 PetscBool isnull; 1355 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1356 PetscCall(PetscDrawIsNull(draw, &isnull)); 1357 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1358 } 1359 1360 { /* assemble the entire matrix onto first processor */ 1361 Mat A = NULL, Av; 1362 IS isrow, iscol; 1363 1364 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1365 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1366 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1367 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1368 /* The commented code uses MatCreateSubMatrices instead */ 1369 /* 1370 Mat *AA, A = NULL, Av; 1371 IS isrow,iscol; 1372 1373 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1374 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1375 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1376 if (rank == 0) { 1377 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1378 A = AA[0]; 1379 Av = AA[0]; 1380 } 1381 PetscCall(MatDestroySubMatrices(1,&AA)); 1382 */ 1383 PetscCall(ISDestroy(&iscol)); 1384 PetscCall(ISDestroy(&isrow)); 1385 /* 1386 Everyone has to call to draw the matrix since the graphics waits are 1387 synchronized across all processors that share the PetscDraw object 1388 */ 1389 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1390 if (rank == 0) { 1391 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1392 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1393 } 1394 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1395 PetscCall(PetscViewerFlush(viewer)); 1396 PetscCall(MatDestroy(&A)); 1397 } 1398 PetscFunctionReturn(PETSC_SUCCESS); 1399 } 1400 1401 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1402 { 1403 PetscBool iascii, isdraw, issocket, isbinary; 1404 1405 PetscFunctionBegin; 1406 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1407 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1408 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1409 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1410 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1411 PetscFunctionReturn(PETSC_SUCCESS); 1412 } 1413 1414 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1415 { 1416 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1417 Vec bb1 = NULL; 1418 PetscBool hasop; 1419 1420 PetscFunctionBegin; 1421 if (flag == SOR_APPLY_UPPER) { 1422 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1423 PetscFunctionReturn(PETSC_SUCCESS); 1424 } 1425 1426 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1427 1428 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1429 if (flag & SOR_ZERO_INITIAL_GUESS) { 1430 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1431 its--; 1432 } 1433 1434 while (its--) { 1435 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1436 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 1438 /* update rhs: bb1 = bb - B*x */ 1439 PetscCall(VecScale(mat->lvec, -1.0)); 1440 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1441 1442 /* local sweep */ 1443 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1444 } 1445 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1446 if (flag & SOR_ZERO_INITIAL_GUESS) { 1447 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1448 its--; 1449 } 1450 while (its--) { 1451 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1452 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 1454 /* update rhs: bb1 = bb - B*x */ 1455 PetscCall(VecScale(mat->lvec, -1.0)); 1456 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1457 1458 /* local sweep */ 1459 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1460 } 1461 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1462 if (flag & SOR_ZERO_INITIAL_GUESS) { 1463 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1464 its--; 1465 } 1466 while (its--) { 1467 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1468 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1469 1470 /* update rhs: bb1 = bb - B*x */ 1471 PetscCall(VecScale(mat->lvec, -1.0)); 1472 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1473 1474 /* local sweep */ 1475 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1476 } 1477 } else if (flag & SOR_EISENSTAT) { 1478 Vec xx1; 1479 1480 PetscCall(VecDuplicate(bb, &xx1)); 1481 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1482 1483 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1484 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1485 if (!mat->diag) { 1486 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1487 PetscCall(MatGetDiagonal(matin, mat->diag)); 1488 } 1489 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1490 if (hasop) { 1491 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1492 } else { 1493 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1494 } 1495 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1496 1497 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1498 1499 /* local sweep */ 1500 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1501 PetscCall(VecAXPY(xx, 1.0, xx1)); 1502 PetscCall(VecDestroy(&xx1)); 1503 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1504 1505 PetscCall(VecDestroy(&bb1)); 1506 1507 matin->factorerrortype = mat->A->factorerrortype; 1508 PetscFunctionReturn(PETSC_SUCCESS); 1509 } 1510 1511 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1512 { 1513 Mat aA, aB, Aperm; 1514 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1515 PetscScalar *aa, *ba; 1516 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1517 PetscSF rowsf, sf; 1518 IS parcolp = NULL; 1519 PetscBool done; 1520 1521 PetscFunctionBegin; 1522 PetscCall(MatGetLocalSize(A, &m, &n)); 1523 PetscCall(ISGetIndices(rowp, &rwant)); 1524 PetscCall(ISGetIndices(colp, &cwant)); 1525 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1526 1527 /* Invert row permutation to find out where my rows should go */ 1528 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1529 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1530 PetscCall(PetscSFSetFromOptions(rowsf)); 1531 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1532 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1533 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1534 1535 /* Invert column permutation to find out where my columns should go */ 1536 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1537 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1538 PetscCall(PetscSFSetFromOptions(sf)); 1539 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1540 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1541 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(ISRestoreIndices(rowp, &rwant)); 1545 PetscCall(ISRestoreIndices(colp, &cwant)); 1546 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1547 1548 /* Find out where my gcols should go */ 1549 PetscCall(MatGetSize(aB, NULL, &ng)); 1550 PetscCall(PetscMalloc1(ng, &gcdest)); 1551 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1552 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1553 PetscCall(PetscSFSetFromOptions(sf)); 1554 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1555 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1556 PetscCall(PetscSFDestroy(&sf)); 1557 1558 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1559 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1560 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1561 for (i = 0; i < m; i++) { 1562 PetscInt row = rdest[i]; 1563 PetscMPIInt rowner; 1564 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1565 for (j = ai[i]; j < ai[i + 1]; j++) { 1566 PetscInt col = cdest[aj[j]]; 1567 PetscMPIInt cowner; 1568 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1569 if (rowner == cowner) dnnz[i]++; 1570 else onnz[i]++; 1571 } 1572 for (j = bi[i]; j < bi[i + 1]; j++) { 1573 PetscInt col = gcdest[bj[j]]; 1574 PetscMPIInt cowner; 1575 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1576 if (rowner == cowner) dnnz[i]++; 1577 else onnz[i]++; 1578 } 1579 } 1580 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1581 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1582 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1583 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1584 PetscCall(PetscSFDestroy(&rowsf)); 1585 1586 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1587 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1588 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1589 for (i = 0; i < m; i++) { 1590 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1591 PetscInt j0, rowlen; 1592 rowlen = ai[i + 1] - ai[i]; 1593 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1594 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1595 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1596 } 1597 rowlen = bi[i + 1] - bi[i]; 1598 for (j0 = j = 0; j < rowlen; j0 = j) { 1599 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1600 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1601 } 1602 } 1603 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1604 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1605 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1606 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1607 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1608 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1609 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1610 PetscCall(PetscFree3(work, rdest, cdest)); 1611 PetscCall(PetscFree(gcdest)); 1612 if (parcolp) PetscCall(ISDestroy(&colp)); 1613 *B = Aperm; 1614 PetscFunctionReturn(PETSC_SUCCESS); 1615 } 1616 1617 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1618 { 1619 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1620 1621 PetscFunctionBegin; 1622 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1623 if (ghosts) *ghosts = aij->garray; 1624 PetscFunctionReturn(PETSC_SUCCESS); 1625 } 1626 1627 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1628 { 1629 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1630 Mat A = mat->A, B = mat->B; 1631 PetscLogDouble isend[5], irecv[5]; 1632 1633 PetscFunctionBegin; 1634 info->block_size = 1.0; 1635 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1636 1637 isend[0] = info->nz_used; 1638 isend[1] = info->nz_allocated; 1639 isend[2] = info->nz_unneeded; 1640 isend[3] = info->memory; 1641 isend[4] = info->mallocs; 1642 1643 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1644 1645 isend[0] += info->nz_used; 1646 isend[1] += info->nz_allocated; 1647 isend[2] += info->nz_unneeded; 1648 isend[3] += info->memory; 1649 isend[4] += info->mallocs; 1650 if (flag == MAT_LOCAL) { 1651 info->nz_used = isend[0]; 1652 info->nz_allocated = isend[1]; 1653 info->nz_unneeded = isend[2]; 1654 info->memory = isend[3]; 1655 info->mallocs = isend[4]; 1656 } else if (flag == MAT_GLOBAL_MAX) { 1657 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1658 1659 info->nz_used = irecv[0]; 1660 info->nz_allocated = irecv[1]; 1661 info->nz_unneeded = irecv[2]; 1662 info->memory = irecv[3]; 1663 info->mallocs = irecv[4]; 1664 } else if (flag == MAT_GLOBAL_SUM) { 1665 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1666 1667 info->nz_used = irecv[0]; 1668 info->nz_allocated = irecv[1]; 1669 info->nz_unneeded = irecv[2]; 1670 info->memory = irecv[3]; 1671 info->mallocs = irecv[4]; 1672 } 1673 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1674 info->fill_ratio_needed = 0; 1675 info->factor_mallocs = 0; 1676 PetscFunctionReturn(PETSC_SUCCESS); 1677 } 1678 1679 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1680 { 1681 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1682 1683 PetscFunctionBegin; 1684 switch (op) { 1685 case MAT_NEW_NONZERO_LOCATIONS: 1686 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1687 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1688 case MAT_KEEP_NONZERO_PATTERN: 1689 case MAT_NEW_NONZERO_LOCATION_ERR: 1690 case MAT_USE_INODES: 1691 case MAT_IGNORE_ZERO_ENTRIES: 1692 case MAT_FORM_EXPLICIT_TRANSPOSE: 1693 MatCheckPreallocated(A, 1); 1694 PetscCall(MatSetOption(a->A, op, flg)); 1695 PetscCall(MatSetOption(a->B, op, flg)); 1696 break; 1697 case MAT_ROW_ORIENTED: 1698 MatCheckPreallocated(A, 1); 1699 a->roworiented = flg; 1700 1701 PetscCall(MatSetOption(a->A, op, flg)); 1702 PetscCall(MatSetOption(a->B, op, flg)); 1703 break; 1704 case MAT_FORCE_DIAGONAL_ENTRIES: 1705 case MAT_SORTED_FULL: 1706 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1707 break; 1708 case MAT_IGNORE_OFF_PROC_ENTRIES: 1709 a->donotstash = flg; 1710 break; 1711 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1712 case MAT_SPD: 1713 case MAT_SYMMETRIC: 1714 case MAT_STRUCTURALLY_SYMMETRIC: 1715 case MAT_HERMITIAN: 1716 case MAT_SYMMETRY_ETERNAL: 1717 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1718 case MAT_SPD_ETERNAL: 1719 /* if the diagonal matrix is square it inherits some of the properties above */ 1720 break; 1721 case MAT_SUBMAT_SINGLEIS: 1722 A->submat_singleis = flg; 1723 break; 1724 case MAT_STRUCTURE_ONLY: 1725 /* The option is handled directly by MatSetOption() */ 1726 break; 1727 default: 1728 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1729 } 1730 PetscFunctionReturn(PETSC_SUCCESS); 1731 } 1732 1733 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1734 { 1735 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1736 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1737 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1738 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1739 PetscInt *cmap, *idx_p; 1740 1741 PetscFunctionBegin; 1742 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1743 mat->getrowactive = PETSC_TRUE; 1744 1745 if (!mat->rowvalues && (idx || v)) { 1746 /* 1747 allocate enough space to hold information from the longest row. 1748 */ 1749 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1750 PetscInt max = 1, tmp; 1751 for (i = 0; i < matin->rmap->n; i++) { 1752 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1753 if (max < tmp) max = tmp; 1754 } 1755 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1756 } 1757 1758 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1759 lrow = row - rstart; 1760 1761 pvA = &vworkA; 1762 pcA = &cworkA; 1763 pvB = &vworkB; 1764 pcB = &cworkB; 1765 if (!v) { 1766 pvA = NULL; 1767 pvB = NULL; 1768 } 1769 if (!idx) { 1770 pcA = NULL; 1771 if (!v) pcB = NULL; 1772 } 1773 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1774 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1775 nztot = nzA + nzB; 1776 1777 cmap = mat->garray; 1778 if (v || idx) { 1779 if (nztot) { 1780 /* Sort by increasing column numbers, assuming A and B already sorted */ 1781 PetscInt imark = -1; 1782 if (v) { 1783 *v = v_p = mat->rowvalues; 1784 for (i = 0; i < nzB; i++) { 1785 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1786 else break; 1787 } 1788 imark = i; 1789 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1790 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1791 } 1792 if (idx) { 1793 *idx = idx_p = mat->rowindices; 1794 if (imark > -1) { 1795 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1796 } else { 1797 for (i = 0; i < nzB; i++) { 1798 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1799 else break; 1800 } 1801 imark = i; 1802 } 1803 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1804 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1805 } 1806 } else { 1807 if (idx) *idx = NULL; 1808 if (v) *v = NULL; 1809 } 1810 } 1811 *nz = nztot; 1812 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1813 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1814 PetscFunctionReturn(PETSC_SUCCESS); 1815 } 1816 1817 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1818 { 1819 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1820 1821 PetscFunctionBegin; 1822 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1823 aij->getrowactive = PETSC_FALSE; 1824 PetscFunctionReturn(PETSC_SUCCESS); 1825 } 1826 1827 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1828 { 1829 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1830 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1831 PetscInt i, j, cstart = mat->cmap->rstart; 1832 PetscReal sum = 0.0; 1833 const MatScalar *v, *amata, *bmata; 1834 1835 PetscFunctionBegin; 1836 if (aij->size == 1) { 1837 PetscCall(MatNorm(aij->A, type, norm)); 1838 } else { 1839 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1840 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1841 if (type == NORM_FROBENIUS) { 1842 v = amata; 1843 for (i = 0; i < amat->nz; i++) { 1844 sum += PetscRealPart(PetscConj(*v) * (*v)); 1845 v++; 1846 } 1847 v = bmata; 1848 for (i = 0; i < bmat->nz; i++) { 1849 sum += PetscRealPart(PetscConj(*v) * (*v)); 1850 v++; 1851 } 1852 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1853 *norm = PetscSqrtReal(*norm); 1854 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1855 } else if (type == NORM_1) { /* max column norm */ 1856 PetscReal *tmp, *tmp2; 1857 PetscInt *jj, *garray = aij->garray; 1858 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1859 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1860 *norm = 0.0; 1861 v = amata; 1862 jj = amat->j; 1863 for (j = 0; j < amat->nz; j++) { 1864 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 v = bmata; 1868 jj = bmat->j; 1869 for (j = 0; j < bmat->nz; j++) { 1870 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1871 v++; 1872 } 1873 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1874 for (j = 0; j < mat->cmap->N; j++) { 1875 if (tmp2[j] > *norm) *norm = tmp2[j]; 1876 } 1877 PetscCall(PetscFree(tmp)); 1878 PetscCall(PetscFree(tmp2)); 1879 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1880 } else if (type == NORM_INFINITY) { /* max row norm */ 1881 PetscReal ntemp = 0.0; 1882 for (j = 0; j < aij->A->rmap->n; j++) { 1883 v = amata + amat->i[j]; 1884 sum = 0.0; 1885 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1886 sum += PetscAbsScalar(*v); 1887 v++; 1888 } 1889 v = bmata + bmat->i[j]; 1890 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1891 sum += PetscAbsScalar(*v); 1892 v++; 1893 } 1894 if (sum > ntemp) ntemp = sum; 1895 } 1896 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1897 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1898 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1899 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1900 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1901 } 1902 PetscFunctionReturn(PETSC_SUCCESS); 1903 } 1904 1905 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1906 { 1907 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1908 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1909 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1910 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1911 Mat B, A_diag, *B_diag; 1912 const MatScalar *pbv, *bv; 1913 1914 PetscFunctionBegin; 1915 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1916 ma = A->rmap->n; 1917 na = A->cmap->n; 1918 mb = a->B->rmap->n; 1919 nb = a->B->cmap->n; 1920 ai = Aloc->i; 1921 aj = Aloc->j; 1922 bi = Bloc->i; 1923 bj = Bloc->j; 1924 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1925 PetscInt *d_nnz, *g_nnz, *o_nnz; 1926 PetscSFNode *oloc; 1927 PETSC_UNUSED PetscSF sf; 1928 1929 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1930 /* compute d_nnz for preallocation */ 1931 PetscCall(PetscArrayzero(d_nnz, na)); 1932 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1933 /* compute local off-diagonal contributions */ 1934 PetscCall(PetscArrayzero(g_nnz, nb)); 1935 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1936 /* map those to global */ 1937 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1938 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1939 PetscCall(PetscSFSetFromOptions(sf)); 1940 PetscCall(PetscArrayzero(o_nnz, na)); 1941 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1942 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1943 PetscCall(PetscSFDestroy(&sf)); 1944 1945 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1946 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1947 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1948 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1949 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1950 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1951 } else { 1952 B = *matout; 1953 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1954 } 1955 1956 b = (Mat_MPIAIJ *)B->data; 1957 A_diag = a->A; 1958 B_diag = &b->A; 1959 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1960 A_diag_ncol = A_diag->cmap->N; 1961 B_diag_ilen = sub_B_diag->ilen; 1962 B_diag_i = sub_B_diag->i; 1963 1964 /* Set ilen for diagonal of B */ 1965 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1966 1967 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1968 very quickly (=without using MatSetValues), because all writes are local. */ 1969 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1970 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1971 1972 /* copy over the B part */ 1973 PetscCall(PetscMalloc1(bi[mb], &cols)); 1974 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1975 pbv = bv; 1976 row = A->rmap->rstart; 1977 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1978 cols_tmp = cols; 1979 for (i = 0; i < mb; i++) { 1980 ncol = bi[i + 1] - bi[i]; 1981 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1982 row++; 1983 pbv += ncol; 1984 cols_tmp += ncol; 1985 } 1986 PetscCall(PetscFree(cols)); 1987 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1988 1989 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1990 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1991 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1992 *matout = B; 1993 } else { 1994 PetscCall(MatHeaderMerge(A, &B)); 1995 } 1996 PetscFunctionReturn(PETSC_SUCCESS); 1997 } 1998 1999 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2002 Mat a = aij->A, b = aij->B; 2003 PetscInt s1, s2, s3; 2004 2005 PetscFunctionBegin; 2006 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2007 if (rr) { 2008 PetscCall(VecGetLocalSize(rr, &s1)); 2009 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2010 /* Overlap communication with computation. */ 2011 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2012 } 2013 if (ll) { 2014 PetscCall(VecGetLocalSize(ll, &s1)); 2015 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2016 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2017 } 2018 /* scale the diagonal block */ 2019 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2020 2021 if (rr) { 2022 /* Do a scatter end and then right scale the off-diagonal block */ 2023 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2024 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2025 } 2026 PetscFunctionReturn(PETSC_SUCCESS); 2027 } 2028 2029 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2030 { 2031 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2032 2033 PetscFunctionBegin; 2034 PetscCall(MatSetUnfactored(a->A)); 2035 PetscFunctionReturn(PETSC_SUCCESS); 2036 } 2037 2038 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2039 { 2040 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2041 Mat a, b, c, d; 2042 PetscBool flg; 2043 2044 PetscFunctionBegin; 2045 a = matA->A; 2046 b = matA->B; 2047 c = matB->A; 2048 d = matB->B; 2049 2050 PetscCall(MatEqual(a, c, &flg)); 2051 if (flg) PetscCall(MatEqual(b, d, &flg)); 2052 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2057 { 2058 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2059 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2060 2061 PetscFunctionBegin; 2062 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2063 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2064 /* because of the column compression in the off-processor part of the matrix a->B, 2065 the number of columns in a->B and b->B may be different, hence we cannot call 2066 the MatCopy() directly on the two parts. If need be, we can provide a more 2067 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2068 then copying the submatrices */ 2069 PetscCall(MatCopy_Basic(A, B, str)); 2070 } else { 2071 PetscCall(MatCopy(a->A, b->A, str)); 2072 PetscCall(MatCopy(a->B, b->B, str)); 2073 } 2074 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2075 PetscFunctionReturn(PETSC_SUCCESS); 2076 } 2077 2078 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2079 { 2080 PetscFunctionBegin; 2081 PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 2082 PetscFunctionReturn(PETSC_SUCCESS); 2083 } 2084 2085 /* 2086 Computes the number of nonzeros per row needed for preallocation when X and Y 2087 have different nonzero structure. 2088 */ 2089 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2090 { 2091 PetscInt i, j, k, nzx, nzy; 2092 2093 PetscFunctionBegin; 2094 /* Set the number of nonzeros in the new matrix */ 2095 for (i = 0; i < m; i++) { 2096 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2097 nzx = xi[i + 1] - xi[i]; 2098 nzy = yi[i + 1] - yi[i]; 2099 nnz[i] = 0; 2100 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2101 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2102 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2103 nnz[i]++; 2104 } 2105 for (; k < nzy; k++) nnz[i]++; 2106 } 2107 PetscFunctionReturn(PETSC_SUCCESS); 2108 } 2109 2110 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2111 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2112 { 2113 PetscInt m = Y->rmap->N; 2114 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2115 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2116 2117 PetscFunctionBegin; 2118 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2119 PetscFunctionReturn(PETSC_SUCCESS); 2120 } 2121 2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2123 { 2124 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2125 2126 PetscFunctionBegin; 2127 if (str == SAME_NONZERO_PATTERN) { 2128 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2129 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2130 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2131 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2132 } else { 2133 Mat B; 2134 PetscInt *nnz_d, *nnz_o; 2135 2136 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2137 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2138 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2139 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2140 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2141 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2142 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2143 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2144 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2145 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2146 PetscCall(MatHeaderMerge(Y, &B)); 2147 PetscCall(PetscFree(nnz_d)); 2148 PetscCall(PetscFree(nnz_o)); 2149 } 2150 PetscFunctionReturn(PETSC_SUCCESS); 2151 } 2152 2153 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2154 2155 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2156 { 2157 PetscFunctionBegin; 2158 if (PetscDefined(USE_COMPLEX)) { 2159 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2160 2161 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2162 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2163 } 2164 PetscFunctionReturn(PETSC_SUCCESS); 2165 } 2166 2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2168 { 2169 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2170 2171 PetscFunctionBegin; 2172 PetscCall(MatRealPart(a->A)); 2173 PetscCall(MatRealPart(a->B)); 2174 PetscFunctionReturn(PETSC_SUCCESS); 2175 } 2176 2177 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2178 { 2179 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2180 2181 PetscFunctionBegin; 2182 PetscCall(MatImaginaryPart(a->A)); 2183 PetscCall(MatImaginaryPart(a->B)); 2184 PetscFunctionReturn(PETSC_SUCCESS); 2185 } 2186 2187 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2188 { 2189 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2190 PetscInt i, *idxb = NULL, m = A->rmap->n; 2191 PetscScalar *va, *vv; 2192 Vec vB, vA; 2193 const PetscScalar *vb; 2194 2195 PetscFunctionBegin; 2196 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2197 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2198 2199 PetscCall(VecGetArrayWrite(vA, &va)); 2200 if (idx) { 2201 for (i = 0; i < m; i++) { 2202 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2203 } 2204 } 2205 2206 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2207 PetscCall(PetscMalloc1(m, &idxb)); 2208 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2209 2210 PetscCall(VecGetArrayWrite(v, &vv)); 2211 PetscCall(VecGetArrayRead(vB, &vb)); 2212 for (i = 0; i < m; i++) { 2213 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2214 vv[i] = vb[i]; 2215 if (idx) idx[i] = a->garray[idxb[i]]; 2216 } else { 2217 vv[i] = va[i]; 2218 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2219 } 2220 } 2221 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2222 PetscCall(VecRestoreArrayWrite(vA, &va)); 2223 PetscCall(VecRestoreArrayRead(vB, &vb)); 2224 PetscCall(PetscFree(idxb)); 2225 PetscCall(VecDestroy(&vA)); 2226 PetscCall(VecDestroy(&vB)); 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } 2229 2230 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2231 { 2232 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2233 PetscInt m = A->rmap->n, n = A->cmap->n; 2234 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2235 PetscInt *cmap = mat->garray; 2236 PetscInt *diagIdx, *offdiagIdx; 2237 Vec diagV, offdiagV; 2238 PetscScalar *a, *diagA, *offdiagA; 2239 const PetscScalar *ba, *bav; 2240 PetscInt r, j, col, ncols, *bi, *bj; 2241 Mat B = mat->B; 2242 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2243 2244 PetscFunctionBegin; 2245 /* When a process holds entire A and other processes have no entry */ 2246 if (A->cmap->N == n) { 2247 PetscCall(VecGetArrayWrite(v, &diagA)); 2248 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2250 PetscCall(VecDestroy(&diagV)); 2251 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2252 PetscFunctionReturn(PETSC_SUCCESS); 2253 } else if (n == 0) { 2254 if (m) { 2255 PetscCall(VecGetArrayWrite(v, &a)); 2256 for (r = 0; r < m; r++) { 2257 a[r] = 0.0; 2258 if (idx) idx[r] = -1; 2259 } 2260 PetscCall(VecRestoreArrayWrite(v, &a)); 2261 } 2262 PetscFunctionReturn(PETSC_SUCCESS); 2263 } 2264 2265 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2266 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2267 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2268 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2269 2270 /* Get offdiagIdx[] for implicit 0.0 */ 2271 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2272 ba = bav; 2273 bi = b->i; 2274 bj = b->j; 2275 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2276 for (r = 0; r < m; r++) { 2277 ncols = bi[r + 1] - bi[r]; 2278 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2279 offdiagA[r] = *ba; 2280 offdiagIdx[r] = cmap[0]; 2281 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2282 offdiagA[r] = 0.0; 2283 2284 /* Find first hole in the cmap */ 2285 for (j = 0; j < ncols; j++) { 2286 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2287 if (col > j && j < cstart) { 2288 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2289 break; 2290 } else if (col > j + n && j >= cstart) { 2291 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2292 break; 2293 } 2294 } 2295 if (j == ncols && ncols < A->cmap->N - n) { 2296 /* a hole is outside compressed Bcols */ 2297 if (ncols == 0) { 2298 if (cstart) { 2299 offdiagIdx[r] = 0; 2300 } else offdiagIdx[r] = cend; 2301 } else { /* ncols > 0 */ 2302 offdiagIdx[r] = cmap[ncols - 1] + 1; 2303 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2304 } 2305 } 2306 } 2307 2308 for (j = 0; j < ncols; j++) { 2309 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2310 offdiagA[r] = *ba; 2311 offdiagIdx[r] = cmap[*bj]; 2312 } 2313 ba++; 2314 bj++; 2315 } 2316 } 2317 2318 PetscCall(VecGetArrayWrite(v, &a)); 2319 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2320 for (r = 0; r < m; ++r) { 2321 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 if (idx) idx[r] = cstart + diagIdx[r]; 2324 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2325 a[r] = diagA[r]; 2326 if (idx) { 2327 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2328 idx[r] = cstart + diagIdx[r]; 2329 } else idx[r] = offdiagIdx[r]; 2330 } 2331 } else { 2332 a[r] = offdiagA[r]; 2333 if (idx) idx[r] = offdiagIdx[r]; 2334 } 2335 } 2336 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2337 PetscCall(VecRestoreArrayWrite(v, &a)); 2338 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2339 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2340 PetscCall(VecDestroy(&diagV)); 2341 PetscCall(VecDestroy(&offdiagV)); 2342 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2343 PetscFunctionReturn(PETSC_SUCCESS); 2344 } 2345 2346 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2347 { 2348 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2349 PetscInt m = A->rmap->n, n = A->cmap->n; 2350 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2351 PetscInt *cmap = mat->garray; 2352 PetscInt *diagIdx, *offdiagIdx; 2353 Vec diagV, offdiagV; 2354 PetscScalar *a, *diagA, *offdiagA; 2355 const PetscScalar *ba, *bav; 2356 PetscInt r, j, col, ncols, *bi, *bj; 2357 Mat B = mat->B; 2358 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2359 2360 PetscFunctionBegin; 2361 /* When a process holds entire A and other processes have no entry */ 2362 if (A->cmap->N == n) { 2363 PetscCall(VecGetArrayWrite(v, &diagA)); 2364 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2365 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2366 PetscCall(VecDestroy(&diagV)); 2367 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2368 PetscFunctionReturn(PETSC_SUCCESS); 2369 } else if (n == 0) { 2370 if (m) { 2371 PetscCall(VecGetArrayWrite(v, &a)); 2372 for (r = 0; r < m; r++) { 2373 a[r] = PETSC_MAX_REAL; 2374 if (idx) idx[r] = -1; 2375 } 2376 PetscCall(VecRestoreArrayWrite(v, &a)); 2377 } 2378 PetscFunctionReturn(PETSC_SUCCESS); 2379 } 2380 2381 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2382 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2383 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2384 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2385 2386 /* Get offdiagIdx[] for implicit 0.0 */ 2387 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2388 ba = bav; 2389 bi = b->i; 2390 bj = b->j; 2391 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2392 for (r = 0; r < m; r++) { 2393 ncols = bi[r + 1] - bi[r]; 2394 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2395 offdiagA[r] = *ba; 2396 offdiagIdx[r] = cmap[0]; 2397 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2398 offdiagA[r] = 0.0; 2399 2400 /* Find first hole in the cmap */ 2401 for (j = 0; j < ncols; j++) { 2402 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2403 if (col > j && j < cstart) { 2404 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2405 break; 2406 } else if (col > j + n && j >= cstart) { 2407 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2408 break; 2409 } 2410 } 2411 if (j == ncols && ncols < A->cmap->N - n) { 2412 /* a hole is outside compressed Bcols */ 2413 if (ncols == 0) { 2414 if (cstart) { 2415 offdiagIdx[r] = 0; 2416 } else offdiagIdx[r] = cend; 2417 } else { /* ncols > 0 */ 2418 offdiagIdx[r] = cmap[ncols - 1] + 1; 2419 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2420 } 2421 } 2422 } 2423 2424 for (j = 0; j < ncols; j++) { 2425 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2426 offdiagA[r] = *ba; 2427 offdiagIdx[r] = cmap[*bj]; 2428 } 2429 ba++; 2430 bj++; 2431 } 2432 } 2433 2434 PetscCall(VecGetArrayWrite(v, &a)); 2435 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2436 for (r = 0; r < m; ++r) { 2437 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2438 a[r] = diagA[r]; 2439 if (idx) idx[r] = cstart + diagIdx[r]; 2440 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2441 a[r] = diagA[r]; 2442 if (idx) { 2443 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2444 idx[r] = cstart + diagIdx[r]; 2445 } else idx[r] = offdiagIdx[r]; 2446 } 2447 } else { 2448 a[r] = offdiagA[r]; 2449 if (idx) idx[r] = offdiagIdx[r]; 2450 } 2451 } 2452 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2453 PetscCall(VecRestoreArrayWrite(v, &a)); 2454 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2455 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2456 PetscCall(VecDestroy(&diagV)); 2457 PetscCall(VecDestroy(&offdiagV)); 2458 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2459 PetscFunctionReturn(PETSC_SUCCESS); 2460 } 2461 2462 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2463 { 2464 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2465 PetscInt m = A->rmap->n, n = A->cmap->n; 2466 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2467 PetscInt *cmap = mat->garray; 2468 PetscInt *diagIdx, *offdiagIdx; 2469 Vec diagV, offdiagV; 2470 PetscScalar *a, *diagA, *offdiagA; 2471 const PetscScalar *ba, *bav; 2472 PetscInt r, j, col, ncols, *bi, *bj; 2473 Mat B = mat->B; 2474 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2475 2476 PetscFunctionBegin; 2477 /* When a process holds entire A and other processes have no entry */ 2478 if (A->cmap->N == n) { 2479 PetscCall(VecGetArrayWrite(v, &diagA)); 2480 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2481 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2482 PetscCall(VecDestroy(&diagV)); 2483 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2484 PetscFunctionReturn(PETSC_SUCCESS); 2485 } else if (n == 0) { 2486 if (m) { 2487 PetscCall(VecGetArrayWrite(v, &a)); 2488 for (r = 0; r < m; r++) { 2489 a[r] = PETSC_MIN_REAL; 2490 if (idx) idx[r] = -1; 2491 } 2492 PetscCall(VecRestoreArrayWrite(v, &a)); 2493 } 2494 PetscFunctionReturn(PETSC_SUCCESS); 2495 } 2496 2497 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2498 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2499 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2500 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2501 2502 /* Get offdiagIdx[] for implicit 0.0 */ 2503 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2504 ba = bav; 2505 bi = b->i; 2506 bj = b->j; 2507 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2508 for (r = 0; r < m; r++) { 2509 ncols = bi[r + 1] - bi[r]; 2510 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2511 offdiagA[r] = *ba; 2512 offdiagIdx[r] = cmap[0]; 2513 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2514 offdiagA[r] = 0.0; 2515 2516 /* Find first hole in the cmap */ 2517 for (j = 0; j < ncols; j++) { 2518 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2519 if (col > j && j < cstart) { 2520 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2521 break; 2522 } else if (col > j + n && j >= cstart) { 2523 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2524 break; 2525 } 2526 } 2527 if (j == ncols && ncols < A->cmap->N - n) { 2528 /* a hole is outside compressed Bcols */ 2529 if (ncols == 0) { 2530 if (cstart) { 2531 offdiagIdx[r] = 0; 2532 } else offdiagIdx[r] = cend; 2533 } else { /* ncols > 0 */ 2534 offdiagIdx[r] = cmap[ncols - 1] + 1; 2535 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2536 } 2537 } 2538 } 2539 2540 for (j = 0; j < ncols; j++) { 2541 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2542 offdiagA[r] = *ba; 2543 offdiagIdx[r] = cmap[*bj]; 2544 } 2545 ba++; 2546 bj++; 2547 } 2548 } 2549 2550 PetscCall(VecGetArrayWrite(v, &a)); 2551 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2552 for (r = 0; r < m; ++r) { 2553 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2554 a[r] = diagA[r]; 2555 if (idx) idx[r] = cstart + diagIdx[r]; 2556 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2557 a[r] = diagA[r]; 2558 if (idx) { 2559 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2560 idx[r] = cstart + diagIdx[r]; 2561 } else idx[r] = offdiagIdx[r]; 2562 } 2563 } else { 2564 a[r] = offdiagA[r]; 2565 if (idx) idx[r] = offdiagIdx[r]; 2566 } 2567 } 2568 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2569 PetscCall(VecRestoreArrayWrite(v, &a)); 2570 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2571 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2572 PetscCall(VecDestroy(&diagV)); 2573 PetscCall(VecDestroy(&offdiagV)); 2574 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2575 PetscFunctionReturn(PETSC_SUCCESS); 2576 } 2577 2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2579 { 2580 Mat *dummy; 2581 2582 PetscFunctionBegin; 2583 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2584 *newmat = *dummy; 2585 PetscCall(PetscFree(dummy)); 2586 PetscFunctionReturn(PETSC_SUCCESS); 2587 } 2588 2589 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2590 { 2591 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2592 2593 PetscFunctionBegin; 2594 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2595 A->factorerrortype = a->A->factorerrortype; 2596 PetscFunctionReturn(PETSC_SUCCESS); 2597 } 2598 2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2600 { 2601 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2602 2603 PetscFunctionBegin; 2604 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2605 PetscCall(MatSetRandom(aij->A, rctx)); 2606 if (x->assembled) { 2607 PetscCall(MatSetRandom(aij->B, rctx)); 2608 } else { 2609 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2610 } 2611 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2612 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2613 PetscFunctionReturn(PETSC_SUCCESS); 2614 } 2615 2616 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2617 { 2618 PetscFunctionBegin; 2619 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2620 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2621 PetscFunctionReturn(PETSC_SUCCESS); 2622 } 2623 2624 /*@ 2625 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2626 2627 Not collective 2628 2629 Input Parameter: 2630 . A - the matrix 2631 2632 Output Parameter: 2633 . nz - the number of nonzeros 2634 2635 Level: advanced 2636 2637 .seealso: `MATMPIAIJ`, `Mat` 2638 @*/ 2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2640 { 2641 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2642 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2643 2644 PetscFunctionBegin; 2645 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2646 PetscFunctionReturn(PETSC_SUCCESS); 2647 } 2648 2649 /*@ 2650 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2651 2652 Collective 2653 2654 Input Parameters: 2655 + A - the matrix 2656 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2657 2658 Level: advanced 2659 2660 @*/ 2661 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2662 { 2663 PetscFunctionBegin; 2664 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2665 PetscFunctionReturn(PETSC_SUCCESS); 2666 } 2667 2668 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2669 { 2670 PetscBool sc = PETSC_FALSE, flg; 2671 2672 PetscFunctionBegin; 2673 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2674 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2675 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2676 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2677 PetscOptionsHeadEnd(); 2678 PetscFunctionReturn(PETSC_SUCCESS); 2679 } 2680 2681 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2682 { 2683 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2684 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2685 2686 PetscFunctionBegin; 2687 if (!Y->preallocated) { 2688 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2689 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2690 PetscInt nonew = aij->nonew; 2691 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2692 aij->nonew = nonew; 2693 } 2694 PetscCall(MatShift_Basic(Y, a)); 2695 PetscFunctionReturn(PETSC_SUCCESS); 2696 } 2697 2698 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2699 { 2700 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2701 2702 PetscFunctionBegin; 2703 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2704 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2705 if (d) { 2706 PetscInt rstart; 2707 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2708 *d += rstart; 2709 } 2710 PetscFunctionReturn(PETSC_SUCCESS); 2711 } 2712 2713 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2714 { 2715 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2716 2717 PetscFunctionBegin; 2718 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2719 PetscFunctionReturn(PETSC_SUCCESS); 2720 } 2721 2722 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2723 { 2724 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2725 2726 PetscFunctionBegin; 2727 PetscCall(MatEliminateZeros(a->A)); 2728 PetscCall(MatEliminateZeros(a->B)); 2729 PetscFunctionReturn(PETSC_SUCCESS); 2730 } 2731 2732 /* -------------------------------------------------------------------*/ 2733 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2734 MatGetRow_MPIAIJ, 2735 MatRestoreRow_MPIAIJ, 2736 MatMult_MPIAIJ, 2737 /* 4*/ MatMultAdd_MPIAIJ, 2738 MatMultTranspose_MPIAIJ, 2739 MatMultTransposeAdd_MPIAIJ, 2740 NULL, 2741 NULL, 2742 NULL, 2743 /*10*/ NULL, 2744 NULL, 2745 NULL, 2746 MatSOR_MPIAIJ, 2747 MatTranspose_MPIAIJ, 2748 /*15*/ MatGetInfo_MPIAIJ, 2749 MatEqual_MPIAIJ, 2750 MatGetDiagonal_MPIAIJ, 2751 MatDiagonalScale_MPIAIJ, 2752 MatNorm_MPIAIJ, 2753 /*20*/ MatAssemblyBegin_MPIAIJ, 2754 MatAssemblyEnd_MPIAIJ, 2755 MatSetOption_MPIAIJ, 2756 MatZeroEntries_MPIAIJ, 2757 /*24*/ MatZeroRows_MPIAIJ, 2758 NULL, 2759 NULL, 2760 NULL, 2761 NULL, 2762 /*29*/ MatSetUp_MPIAIJ, 2763 NULL, 2764 NULL, 2765 MatGetDiagonalBlock_MPIAIJ, 2766 NULL, 2767 /*34*/ MatDuplicate_MPIAIJ, 2768 NULL, 2769 NULL, 2770 NULL, 2771 NULL, 2772 /*39*/ MatAXPY_MPIAIJ, 2773 MatCreateSubMatrices_MPIAIJ, 2774 MatIncreaseOverlap_MPIAIJ, 2775 MatGetValues_MPIAIJ, 2776 MatCopy_MPIAIJ, 2777 /*44*/ MatGetRowMax_MPIAIJ, 2778 MatScale_MPIAIJ, 2779 MatShift_MPIAIJ, 2780 MatDiagonalSet_MPIAIJ, 2781 MatZeroRowsColumns_MPIAIJ, 2782 /*49*/ MatSetRandom_MPIAIJ, 2783 MatGetRowIJ_MPIAIJ, 2784 MatRestoreRowIJ_MPIAIJ, 2785 NULL, 2786 NULL, 2787 /*54*/ MatFDColoringCreate_MPIXAIJ, 2788 NULL, 2789 MatSetUnfactored_MPIAIJ, 2790 MatPermute_MPIAIJ, 2791 NULL, 2792 /*59*/ MatCreateSubMatrix_MPIAIJ, 2793 MatDestroy_MPIAIJ, 2794 MatView_MPIAIJ, 2795 NULL, 2796 NULL, 2797 /*64*/ NULL, 2798 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2799 NULL, 2800 NULL, 2801 NULL, 2802 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2803 MatGetRowMinAbs_MPIAIJ, 2804 NULL, 2805 NULL, 2806 NULL, 2807 NULL, 2808 /*75*/ MatFDColoringApply_AIJ, 2809 MatSetFromOptions_MPIAIJ, 2810 NULL, 2811 NULL, 2812 MatFindZeroDiagonals_MPIAIJ, 2813 /*80*/ NULL, 2814 NULL, 2815 NULL, 2816 /*83*/ MatLoad_MPIAIJ, 2817 MatIsSymmetric_MPIAIJ, 2818 NULL, 2819 NULL, 2820 NULL, 2821 NULL, 2822 /*89*/ NULL, 2823 NULL, 2824 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2825 NULL, 2826 NULL, 2827 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2828 NULL, 2829 NULL, 2830 NULL, 2831 MatBindToCPU_MPIAIJ, 2832 /*99*/ MatProductSetFromOptions_MPIAIJ, 2833 NULL, 2834 NULL, 2835 MatConjugate_MPIAIJ, 2836 NULL, 2837 /*104*/ MatSetValuesRow_MPIAIJ, 2838 MatRealPart_MPIAIJ, 2839 MatImaginaryPart_MPIAIJ, 2840 NULL, 2841 NULL, 2842 /*109*/ NULL, 2843 NULL, 2844 MatGetRowMin_MPIAIJ, 2845 NULL, 2846 MatMissingDiagonal_MPIAIJ, 2847 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2848 NULL, 2849 MatGetGhosts_MPIAIJ, 2850 NULL, 2851 NULL, 2852 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2853 NULL, 2854 NULL, 2855 NULL, 2856 MatGetMultiProcBlock_MPIAIJ, 2857 /*124*/ MatFindNonzeroRows_MPIAIJ, 2858 MatGetColumnReductions_MPIAIJ, 2859 MatInvertBlockDiagonal_MPIAIJ, 2860 MatInvertVariableBlockDiagonal_MPIAIJ, 2861 MatCreateSubMatricesMPI_MPIAIJ, 2862 /*129*/ NULL, 2863 NULL, 2864 NULL, 2865 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2866 NULL, 2867 /*134*/ NULL, 2868 NULL, 2869 NULL, 2870 NULL, 2871 NULL, 2872 /*139*/ MatSetBlockSizes_MPIAIJ, 2873 NULL, 2874 NULL, 2875 MatFDColoringSetUp_MPIXAIJ, 2876 MatFindOffBlockDiagonalEntries_MPIAIJ, 2877 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2878 /*145*/ NULL, 2879 NULL, 2880 NULL, 2881 MatCreateGraph_Simple_AIJ, 2882 NULL, 2883 /*150*/ NULL, 2884 MatEliminateZeros_MPIAIJ}; 2885 2886 /* ----------------------------------------------------------------------------------------*/ 2887 2888 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2889 { 2890 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2891 2892 PetscFunctionBegin; 2893 PetscCall(MatStoreValues(aij->A)); 2894 PetscCall(MatStoreValues(aij->B)); 2895 PetscFunctionReturn(PETSC_SUCCESS); 2896 } 2897 2898 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2899 { 2900 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2901 2902 PetscFunctionBegin; 2903 PetscCall(MatRetrieveValues(aij->A)); 2904 PetscCall(MatRetrieveValues(aij->B)); 2905 PetscFunctionReturn(PETSC_SUCCESS); 2906 } 2907 2908 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2909 { 2910 Mat_MPIAIJ *b; 2911 PetscMPIInt size; 2912 2913 PetscFunctionBegin; 2914 PetscCall(PetscLayoutSetUp(B->rmap)); 2915 PetscCall(PetscLayoutSetUp(B->cmap)); 2916 b = (Mat_MPIAIJ *)B->data; 2917 2918 #if defined(PETSC_USE_CTABLE) 2919 PetscCall(PetscHMapIDestroy(&b->colmap)); 2920 #else 2921 PetscCall(PetscFree(b->colmap)); 2922 #endif 2923 PetscCall(PetscFree(b->garray)); 2924 PetscCall(VecDestroy(&b->lvec)); 2925 PetscCall(VecScatterDestroy(&b->Mvctx)); 2926 2927 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2928 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2929 PetscCall(MatDestroy(&b->B)); 2930 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2931 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2932 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2933 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2934 2935 if (!B->preallocated) { 2936 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2937 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2938 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2939 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2940 } 2941 2942 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2943 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2944 B->preallocated = PETSC_TRUE; 2945 B->was_assembled = PETSC_FALSE; 2946 B->assembled = PETSC_FALSE; 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2951 { 2952 Mat_MPIAIJ *b; 2953 2954 PetscFunctionBegin; 2955 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2956 PetscCall(PetscLayoutSetUp(B->rmap)); 2957 PetscCall(PetscLayoutSetUp(B->cmap)); 2958 b = (Mat_MPIAIJ *)B->data; 2959 2960 #if defined(PETSC_USE_CTABLE) 2961 PetscCall(PetscHMapIDestroy(&b->colmap)); 2962 #else 2963 PetscCall(PetscFree(b->colmap)); 2964 #endif 2965 PetscCall(PetscFree(b->garray)); 2966 PetscCall(VecDestroy(&b->lvec)); 2967 PetscCall(VecScatterDestroy(&b->Mvctx)); 2968 2969 PetscCall(MatResetPreallocation(b->A)); 2970 PetscCall(MatResetPreallocation(b->B)); 2971 B->preallocated = PETSC_TRUE; 2972 B->was_assembled = PETSC_FALSE; 2973 B->assembled = PETSC_FALSE; 2974 PetscFunctionReturn(PETSC_SUCCESS); 2975 } 2976 2977 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2978 { 2979 Mat mat; 2980 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2981 2982 PetscFunctionBegin; 2983 *newmat = NULL; 2984 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2985 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2986 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2987 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2988 a = (Mat_MPIAIJ *)mat->data; 2989 2990 mat->factortype = matin->factortype; 2991 mat->assembled = matin->assembled; 2992 mat->insertmode = NOT_SET_VALUES; 2993 mat->preallocated = matin->preallocated; 2994 2995 a->size = oldmat->size; 2996 a->rank = oldmat->rank; 2997 a->donotstash = oldmat->donotstash; 2998 a->roworiented = oldmat->roworiented; 2999 a->rowindices = NULL; 3000 a->rowvalues = NULL; 3001 a->getrowactive = PETSC_FALSE; 3002 3003 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3004 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3005 3006 if (oldmat->colmap) { 3007 #if defined(PETSC_USE_CTABLE) 3008 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3009 #else 3010 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3011 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3012 #endif 3013 } else a->colmap = NULL; 3014 if (oldmat->garray) { 3015 PetscInt len; 3016 len = oldmat->B->cmap->n; 3017 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3018 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3019 } else a->garray = NULL; 3020 3021 /* It may happen MatDuplicate is called with a non-assembled matrix 3022 In fact, MatDuplicate only requires the matrix to be preallocated 3023 This may happen inside a DMCreateMatrix_Shell */ 3024 if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); } 3025 if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); } 3026 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3027 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3028 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3029 *newmat = mat; 3030 PetscFunctionReturn(PETSC_SUCCESS); 3031 } 3032 3033 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3034 { 3035 PetscBool isbinary, ishdf5; 3036 3037 PetscFunctionBegin; 3038 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3039 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3040 /* force binary viewer to load .info file if it has not yet done so */ 3041 PetscCall(PetscViewerSetUp(viewer)); 3042 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3043 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3044 if (isbinary) { 3045 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3046 } else if (ishdf5) { 3047 #if defined(PETSC_HAVE_HDF5) 3048 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3049 #else 3050 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3051 #endif 3052 } else { 3053 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3054 } 3055 PetscFunctionReturn(PETSC_SUCCESS); 3056 } 3057 3058 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3059 { 3060 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3061 PetscInt *rowidxs, *colidxs; 3062 PetscScalar *matvals; 3063 3064 PetscFunctionBegin; 3065 PetscCall(PetscViewerSetUp(viewer)); 3066 3067 /* read in matrix header */ 3068 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3069 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3070 M = header[1]; 3071 N = header[2]; 3072 nz = header[3]; 3073 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3074 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3075 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3076 3077 /* set block sizes from the viewer's .info file */ 3078 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3079 /* set global sizes if not set already */ 3080 if (mat->rmap->N < 0) mat->rmap->N = M; 3081 if (mat->cmap->N < 0) mat->cmap->N = N; 3082 PetscCall(PetscLayoutSetUp(mat->rmap)); 3083 PetscCall(PetscLayoutSetUp(mat->cmap)); 3084 3085 /* check if the matrix sizes are correct */ 3086 PetscCall(MatGetSize(mat, &rows, &cols)); 3087 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3088 3089 /* read in row lengths and build row indices */ 3090 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3091 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3092 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3093 rowidxs[0] = 0; 3094 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3095 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3096 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3097 /* read in column indices and matrix values */ 3098 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3100 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3101 /* store matrix indices and values */ 3102 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3103 PetscCall(PetscFree(rowidxs)); 3104 PetscCall(PetscFree2(colidxs, matvals)); 3105 PetscFunctionReturn(PETSC_SUCCESS); 3106 } 3107 3108 /* Not scalable because of ISAllGather() unless getting all columns. */ 3109 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3110 { 3111 IS iscol_local; 3112 PetscBool isstride; 3113 PetscMPIInt lisstride = 0, gisstride; 3114 3115 PetscFunctionBegin; 3116 /* check if we are grabbing all columns*/ 3117 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3118 3119 if (isstride) { 3120 PetscInt start, len, mstart, mlen; 3121 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3122 PetscCall(ISGetLocalSize(iscol, &len)); 3123 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3124 if (mstart == start && mlen - mstart == len) lisstride = 1; 3125 } 3126 3127 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3128 if (gisstride) { 3129 PetscInt N; 3130 PetscCall(MatGetSize(mat, NULL, &N)); 3131 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3132 PetscCall(ISSetIdentity(iscol_local)); 3133 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3134 } else { 3135 PetscInt cbs; 3136 PetscCall(ISGetBlockSize(iscol, &cbs)); 3137 PetscCall(ISAllGather(iscol, &iscol_local)); 3138 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3139 } 3140 3141 *isseq = iscol_local; 3142 PetscFunctionReturn(PETSC_SUCCESS); 3143 } 3144 3145 /* 3146 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3147 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3148 3149 Input Parameters: 3150 mat - matrix 3151 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3152 i.e., mat->rstart <= isrow[i] < mat->rend 3153 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3154 i.e., mat->cstart <= iscol[i] < mat->cend 3155 Output Parameter: 3156 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3157 iscol_o - sequential column index set for retrieving mat->B 3158 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3159 */ 3160 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3161 { 3162 Vec x, cmap; 3163 const PetscInt *is_idx; 3164 PetscScalar *xarray, *cmaparray; 3165 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3167 Mat B = a->B; 3168 Vec lvec = a->lvec, lcmap; 3169 PetscInt i, cstart, cend, Bn = B->cmap->N; 3170 MPI_Comm comm; 3171 VecScatter Mvctx = a->Mvctx; 3172 3173 PetscFunctionBegin; 3174 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3175 PetscCall(ISGetLocalSize(iscol, &ncols)); 3176 3177 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3178 PetscCall(MatCreateVecs(mat, &x, NULL)); 3179 PetscCall(VecSet(x, -1.0)); 3180 PetscCall(VecDuplicate(x, &cmap)); 3181 PetscCall(VecSet(cmap, -1.0)); 3182 3183 /* Get start indices */ 3184 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3185 isstart -= ncols; 3186 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3187 3188 PetscCall(ISGetIndices(iscol, &is_idx)); 3189 PetscCall(VecGetArray(x, &xarray)); 3190 PetscCall(VecGetArray(cmap, &cmaparray)); 3191 PetscCall(PetscMalloc1(ncols, &idx)); 3192 for (i = 0; i < ncols; i++) { 3193 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3194 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3195 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3196 } 3197 PetscCall(VecRestoreArray(x, &xarray)); 3198 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3199 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3200 3201 /* Get iscol_d */ 3202 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3203 PetscCall(ISGetBlockSize(iscol, &i)); 3204 PetscCall(ISSetBlockSize(*iscol_d, i)); 3205 3206 /* Get isrow_d */ 3207 PetscCall(ISGetLocalSize(isrow, &m)); 3208 rstart = mat->rmap->rstart; 3209 PetscCall(PetscMalloc1(m, &idx)); 3210 PetscCall(ISGetIndices(isrow, &is_idx)); 3211 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3212 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3213 3214 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3215 PetscCall(ISGetBlockSize(isrow, &i)); 3216 PetscCall(ISSetBlockSize(*isrow_d, i)); 3217 3218 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3219 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3220 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 3222 PetscCall(VecDuplicate(lvec, &lcmap)); 3223 3224 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3225 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 3227 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3228 /* off-process column indices */ 3229 count = 0; 3230 PetscCall(PetscMalloc1(Bn, &idx)); 3231 PetscCall(PetscMalloc1(Bn, &cmap1)); 3232 3233 PetscCall(VecGetArray(lvec, &xarray)); 3234 PetscCall(VecGetArray(lcmap, &cmaparray)); 3235 for (i = 0; i < Bn; i++) { 3236 if (PetscRealPart(xarray[i]) > -1.0) { 3237 idx[count] = i; /* local column index in off-diagonal part B */ 3238 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3239 count++; 3240 } 3241 } 3242 PetscCall(VecRestoreArray(lvec, &xarray)); 3243 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3244 3245 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3246 /* cannot ensure iscol_o has same blocksize as iscol! */ 3247 3248 PetscCall(PetscFree(idx)); 3249 *garray = cmap1; 3250 3251 PetscCall(VecDestroy(&x)); 3252 PetscCall(VecDestroy(&cmap)); 3253 PetscCall(VecDestroy(&lcmap)); 3254 PetscFunctionReturn(PETSC_SUCCESS); 3255 } 3256 3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3259 { 3260 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3261 Mat M = NULL; 3262 MPI_Comm comm; 3263 IS iscol_d, isrow_d, iscol_o; 3264 Mat Asub = NULL, Bsub = NULL; 3265 PetscInt n; 3266 3267 PetscFunctionBegin; 3268 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3269 3270 if (call == MAT_REUSE_MATRIX) { 3271 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3272 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3273 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3274 3275 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3276 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3277 3278 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3279 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3280 3281 /* Update diagonal and off-diagonal portions of submat */ 3282 asub = (Mat_MPIAIJ *)(*submat)->data; 3283 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3284 PetscCall(ISGetLocalSize(iscol_o, &n)); 3285 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3286 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3287 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3288 3289 } else { /* call == MAT_INITIAL_MATRIX) */ 3290 const PetscInt *garray; 3291 PetscInt BsubN; 3292 3293 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3294 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3295 3296 /* Create local submatrices Asub and Bsub */ 3297 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3299 3300 /* Create submatrix M */ 3301 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3302 3303 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3304 asub = (Mat_MPIAIJ *)M->data; 3305 3306 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3307 n = asub->B->cmap->N; 3308 if (BsubN > n) { 3309 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3310 const PetscInt *idx; 3311 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3312 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3313 3314 PetscCall(PetscMalloc1(n, &idx_new)); 3315 j = 0; 3316 PetscCall(ISGetIndices(iscol_o, &idx)); 3317 for (i = 0; i < n; i++) { 3318 if (j >= BsubN) break; 3319 while (subgarray[i] > garray[j]) j++; 3320 3321 if (subgarray[i] == garray[j]) { 3322 idx_new[i] = idx[j++]; 3323 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3324 } 3325 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3326 3327 PetscCall(ISDestroy(&iscol_o)); 3328 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3329 3330 } else if (BsubN < n) { 3331 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3332 } 3333 3334 PetscCall(PetscFree(garray)); 3335 *submat = M; 3336 3337 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3338 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3339 PetscCall(ISDestroy(&isrow_d)); 3340 3341 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3342 PetscCall(ISDestroy(&iscol_d)); 3343 3344 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3345 PetscCall(ISDestroy(&iscol_o)); 3346 } 3347 PetscFunctionReturn(PETSC_SUCCESS); 3348 } 3349 3350 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3351 { 3352 IS iscol_local = NULL, isrow_d; 3353 PetscInt csize; 3354 PetscInt n, i, j, start, end; 3355 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3356 MPI_Comm comm; 3357 3358 PetscFunctionBegin; 3359 /* If isrow has same processor distribution as mat, 3360 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3361 if (call == MAT_REUSE_MATRIX) { 3362 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3363 if (isrow_d) { 3364 sameRowDist = PETSC_TRUE; 3365 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3366 } else { 3367 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3368 if (iscol_local) { 3369 sameRowDist = PETSC_TRUE; 3370 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3371 } 3372 } 3373 } else { 3374 /* Check if isrow has same processor distribution as mat */ 3375 sameDist[0] = PETSC_FALSE; 3376 PetscCall(ISGetLocalSize(isrow, &n)); 3377 if (!n) { 3378 sameDist[0] = PETSC_TRUE; 3379 } else { 3380 PetscCall(ISGetMinMax(isrow, &i, &j)); 3381 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3382 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3383 } 3384 3385 /* Check if iscol has same processor distribution as mat */ 3386 sameDist[1] = PETSC_FALSE; 3387 PetscCall(ISGetLocalSize(iscol, &n)); 3388 if (!n) { 3389 sameDist[1] = PETSC_TRUE; 3390 } else { 3391 PetscCall(ISGetMinMax(iscol, &i, &j)); 3392 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3393 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3394 } 3395 3396 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3397 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3398 sameRowDist = tsameDist[0]; 3399 } 3400 3401 if (sameRowDist) { 3402 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3403 /* isrow and iscol have same processor distribution as mat */ 3404 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3405 PetscFunctionReturn(PETSC_SUCCESS); 3406 } else { /* sameRowDist */ 3407 /* isrow has same processor distribution as mat */ 3408 if (call == MAT_INITIAL_MATRIX) { 3409 PetscBool sorted; 3410 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3411 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3412 PetscCall(ISGetSize(iscol, &i)); 3413 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3414 3415 PetscCall(ISSorted(iscol_local, &sorted)); 3416 if (sorted) { 3417 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3418 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3419 PetscFunctionReturn(PETSC_SUCCESS); 3420 } 3421 } else { /* call == MAT_REUSE_MATRIX */ 3422 IS iscol_sub; 3423 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3424 if (iscol_sub) { 3425 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3426 PetscFunctionReturn(PETSC_SUCCESS); 3427 } 3428 } 3429 } 3430 } 3431 3432 /* General case: iscol -> iscol_local which has global size of iscol */ 3433 if (call == MAT_REUSE_MATRIX) { 3434 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3435 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3436 } else { 3437 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3438 } 3439 3440 PetscCall(ISGetLocalSize(iscol, &csize)); 3441 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3442 3443 if (call == MAT_INITIAL_MATRIX) { 3444 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3445 PetscCall(ISDestroy(&iscol_local)); 3446 } 3447 PetscFunctionReturn(PETSC_SUCCESS); 3448 } 3449 3450 /*@C 3451 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3452 and "off-diagonal" part of the matrix in CSR format. 3453 3454 Collective 3455 3456 Input Parameters: 3457 + comm - MPI communicator 3458 . A - "diagonal" portion of matrix 3459 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3460 - garray - global index of B columns 3461 3462 Output Parameter: 3463 . mat - the matrix, with input A as its local diagonal matrix 3464 Level: advanced 3465 3466 Notes: 3467 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3468 3469 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3470 3471 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3472 @*/ 3473 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3474 { 3475 Mat_MPIAIJ *maij; 3476 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3477 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3478 const PetscScalar *oa; 3479 Mat Bnew; 3480 PetscInt m, n, N; 3481 MatType mpi_mat_type; 3482 3483 PetscFunctionBegin; 3484 PetscCall(MatCreate(comm, mat)); 3485 PetscCall(MatGetSize(A, &m, &n)); 3486 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3487 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3488 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3489 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3490 3491 /* Get global columns of mat */ 3492 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3493 3494 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3495 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3496 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3497 PetscCall(MatSetType(*mat, mpi_mat_type)); 3498 3499 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3500 maij = (Mat_MPIAIJ *)(*mat)->data; 3501 3502 (*mat)->preallocated = PETSC_TRUE; 3503 3504 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3505 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3506 3507 /* Set A as diagonal portion of *mat */ 3508 maij->A = A; 3509 3510 nz = oi[m]; 3511 for (i = 0; i < nz; i++) { 3512 col = oj[i]; 3513 oj[i] = garray[col]; 3514 } 3515 3516 /* Set Bnew as off-diagonal portion of *mat */ 3517 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3518 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3519 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3520 bnew = (Mat_SeqAIJ *)Bnew->data; 3521 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3522 maij->B = Bnew; 3523 3524 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3525 3526 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3527 b->free_a = PETSC_FALSE; 3528 b->free_ij = PETSC_FALSE; 3529 PetscCall(MatDestroy(&B)); 3530 3531 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3532 bnew->free_a = PETSC_TRUE; 3533 bnew->free_ij = PETSC_TRUE; 3534 3535 /* condense columns of maij->B */ 3536 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3537 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3538 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3539 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3540 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3541 PetscFunctionReturn(PETSC_SUCCESS); 3542 } 3543 3544 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3545 3546 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3547 { 3548 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3549 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3550 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3551 Mat M, Msub, B = a->B; 3552 MatScalar *aa; 3553 Mat_SeqAIJ *aij; 3554 PetscInt *garray = a->garray, *colsub, Ncols; 3555 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3556 IS iscol_sub, iscmap; 3557 const PetscInt *is_idx, *cmap; 3558 PetscBool allcolumns = PETSC_FALSE; 3559 MPI_Comm comm; 3560 3561 PetscFunctionBegin; 3562 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3563 if (call == MAT_REUSE_MATRIX) { 3564 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3565 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3566 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3567 3568 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3569 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3570 3571 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3572 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3573 3574 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3575 3576 } else { /* call == MAT_INITIAL_MATRIX) */ 3577 PetscBool flg; 3578 3579 PetscCall(ISGetLocalSize(iscol, &n)); 3580 PetscCall(ISGetSize(iscol, &Ncols)); 3581 3582 /* (1) iscol -> nonscalable iscol_local */ 3583 /* Check for special case: each processor gets entire matrix columns */ 3584 PetscCall(ISIdentity(iscol_local, &flg)); 3585 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3586 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3587 if (allcolumns) { 3588 iscol_sub = iscol_local; 3589 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3590 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3591 3592 } else { 3593 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3594 PetscInt *idx, *cmap1, k; 3595 PetscCall(PetscMalloc1(Ncols, &idx)); 3596 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3597 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3598 count = 0; 3599 k = 0; 3600 for (i = 0; i < Ncols; i++) { 3601 j = is_idx[i]; 3602 if (j >= cstart && j < cend) { 3603 /* diagonal part of mat */ 3604 idx[count] = j; 3605 cmap1[count++] = i; /* column index in submat */ 3606 } else if (Bn) { 3607 /* off-diagonal part of mat */ 3608 if (j == garray[k]) { 3609 idx[count] = j; 3610 cmap1[count++] = i; /* column index in submat */ 3611 } else if (j > garray[k]) { 3612 while (j > garray[k] && k < Bn - 1) k++; 3613 if (j == garray[k]) { 3614 idx[count] = j; 3615 cmap1[count++] = i; /* column index in submat */ 3616 } 3617 } 3618 } 3619 } 3620 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3621 3622 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3623 PetscCall(ISGetBlockSize(iscol, &cbs)); 3624 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3625 3626 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3627 } 3628 3629 /* (3) Create sequential Msub */ 3630 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3631 } 3632 3633 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3634 aij = (Mat_SeqAIJ *)(Msub)->data; 3635 ii = aij->i; 3636 PetscCall(ISGetIndices(iscmap, &cmap)); 3637 3638 /* 3639 m - number of local rows 3640 Ncols - number of columns (same on all processors) 3641 rstart - first row in new global matrix generated 3642 */ 3643 PetscCall(MatGetSize(Msub, &m, NULL)); 3644 3645 if (call == MAT_INITIAL_MATRIX) { 3646 /* (4) Create parallel newmat */ 3647 PetscMPIInt rank, size; 3648 PetscInt csize; 3649 3650 PetscCallMPI(MPI_Comm_size(comm, &size)); 3651 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3652 3653 /* 3654 Determine the number of non-zeros in the diagonal and off-diagonal 3655 portions of the matrix in order to do correct preallocation 3656 */ 3657 3658 /* first get start and end of "diagonal" columns */ 3659 PetscCall(ISGetLocalSize(iscol, &csize)); 3660 if (csize == PETSC_DECIDE) { 3661 PetscCall(ISGetSize(isrow, &mglobal)); 3662 if (mglobal == Ncols) { /* square matrix */ 3663 nlocal = m; 3664 } else { 3665 nlocal = Ncols / size + ((Ncols % size) > rank); 3666 } 3667 } else { 3668 nlocal = csize; 3669 } 3670 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3671 rstart = rend - nlocal; 3672 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3673 3674 /* next, compute all the lengths */ 3675 jj = aij->j; 3676 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3677 olens = dlens + m; 3678 for (i = 0; i < m; i++) { 3679 jend = ii[i + 1] - ii[i]; 3680 olen = 0; 3681 dlen = 0; 3682 for (j = 0; j < jend; j++) { 3683 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3684 else dlen++; 3685 jj++; 3686 } 3687 olens[i] = olen; 3688 dlens[i] = dlen; 3689 } 3690 3691 PetscCall(ISGetBlockSize(isrow, &bs)); 3692 PetscCall(ISGetBlockSize(iscol, &cbs)); 3693 3694 PetscCall(MatCreate(comm, &M)); 3695 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3696 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3697 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3698 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3699 PetscCall(PetscFree(dlens)); 3700 3701 } else { /* call == MAT_REUSE_MATRIX */ 3702 M = *newmat; 3703 PetscCall(MatGetLocalSize(M, &i, NULL)); 3704 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3705 PetscCall(MatZeroEntries(M)); 3706 /* 3707 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3708 rather than the slower MatSetValues(). 3709 */ 3710 M->was_assembled = PETSC_TRUE; 3711 M->assembled = PETSC_FALSE; 3712 } 3713 3714 /* (5) Set values of Msub to *newmat */ 3715 PetscCall(PetscMalloc1(count, &colsub)); 3716 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3717 3718 jj = aij->j; 3719 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3720 for (i = 0; i < m; i++) { 3721 row = rstart + i; 3722 nz = ii[i + 1] - ii[i]; 3723 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3724 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3725 jj += nz; 3726 aa += nz; 3727 } 3728 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3729 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3730 3731 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3732 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3733 3734 PetscCall(PetscFree(colsub)); 3735 3736 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3737 if (call == MAT_INITIAL_MATRIX) { 3738 *newmat = M; 3739 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3740 PetscCall(MatDestroy(&Msub)); 3741 3742 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3743 PetscCall(ISDestroy(&iscol_sub)); 3744 3745 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3746 PetscCall(ISDestroy(&iscmap)); 3747 3748 if (iscol_local) { 3749 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3750 PetscCall(ISDestroy(&iscol_local)); 3751 } 3752 } 3753 PetscFunctionReturn(PETSC_SUCCESS); 3754 } 3755 3756 /* 3757 Not great since it makes two copies of the submatrix, first an SeqAIJ 3758 in local and then by concatenating the local matrices the end result. 3759 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3760 3761 This requires a sequential iscol with all indices. 3762 */ 3763 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3764 { 3765 PetscMPIInt rank, size; 3766 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3767 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3768 Mat M, Mreuse; 3769 MatScalar *aa, *vwork; 3770 MPI_Comm comm; 3771 Mat_SeqAIJ *aij; 3772 PetscBool colflag, allcolumns = PETSC_FALSE; 3773 3774 PetscFunctionBegin; 3775 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3776 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3777 PetscCallMPI(MPI_Comm_size(comm, &size)); 3778 3779 /* Check for special case: each processor gets entire matrix columns */ 3780 PetscCall(ISIdentity(iscol, &colflag)); 3781 PetscCall(ISGetLocalSize(iscol, &n)); 3782 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3783 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3784 3785 if (call == MAT_REUSE_MATRIX) { 3786 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3787 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3788 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3789 } else { 3790 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3791 } 3792 3793 /* 3794 m - number of local rows 3795 n - number of columns (same on all processors) 3796 rstart - first row in new global matrix generated 3797 */ 3798 PetscCall(MatGetSize(Mreuse, &m, &n)); 3799 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3800 if (call == MAT_INITIAL_MATRIX) { 3801 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3802 ii = aij->i; 3803 jj = aij->j; 3804 3805 /* 3806 Determine the number of non-zeros in the diagonal and off-diagonal 3807 portions of the matrix in order to do correct preallocation 3808 */ 3809 3810 /* first get start and end of "diagonal" columns */ 3811 if (csize == PETSC_DECIDE) { 3812 PetscCall(ISGetSize(isrow, &mglobal)); 3813 if (mglobal == n) { /* square matrix */ 3814 nlocal = m; 3815 } else { 3816 nlocal = n / size + ((n % size) > rank); 3817 } 3818 } else { 3819 nlocal = csize; 3820 } 3821 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3822 rstart = rend - nlocal; 3823 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3824 3825 /* next, compute all the lengths */ 3826 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3827 olens = dlens + m; 3828 for (i = 0; i < m; i++) { 3829 jend = ii[i + 1] - ii[i]; 3830 olen = 0; 3831 dlen = 0; 3832 for (j = 0; j < jend; j++) { 3833 if (*jj < rstart || *jj >= rend) olen++; 3834 else dlen++; 3835 jj++; 3836 } 3837 olens[i] = olen; 3838 dlens[i] = dlen; 3839 } 3840 PetscCall(MatCreate(comm, &M)); 3841 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3842 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3843 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3844 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3845 PetscCall(PetscFree(dlens)); 3846 } else { 3847 PetscInt ml, nl; 3848 3849 M = *newmat; 3850 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3851 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3852 PetscCall(MatZeroEntries(M)); 3853 /* 3854 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3855 rather than the slower MatSetValues(). 3856 */ 3857 M->was_assembled = PETSC_TRUE; 3858 M->assembled = PETSC_FALSE; 3859 } 3860 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3861 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3862 ii = aij->i; 3863 jj = aij->j; 3864 3865 /* trigger copy to CPU if needed */ 3866 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3867 for (i = 0; i < m; i++) { 3868 row = rstart + i; 3869 nz = ii[i + 1] - ii[i]; 3870 cwork = jj; 3871 jj += nz; 3872 vwork = aa; 3873 aa += nz; 3874 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3875 } 3876 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3877 3878 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3879 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3880 *newmat = M; 3881 3882 /* save submatrix used in processor for next request */ 3883 if (call == MAT_INITIAL_MATRIX) { 3884 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3885 PetscCall(MatDestroy(&Mreuse)); 3886 } 3887 PetscFunctionReturn(PETSC_SUCCESS); 3888 } 3889 3890 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3891 { 3892 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3893 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3894 const PetscInt *JJ; 3895 PetscBool nooffprocentries; 3896 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3897 3898 PetscFunctionBegin; 3899 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3900 3901 PetscCall(PetscLayoutSetUp(B->rmap)); 3902 PetscCall(PetscLayoutSetUp(B->cmap)); 3903 m = B->rmap->n; 3904 cstart = B->cmap->rstart; 3905 cend = B->cmap->rend; 3906 rstart = B->rmap->rstart; 3907 3908 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3909 3910 if (PetscDefined(USE_DEBUG)) { 3911 for (i = 0; i < m; i++) { 3912 nnz = Ii[i + 1] - Ii[i]; 3913 JJ = J + Ii[i]; 3914 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3915 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3916 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3917 } 3918 } 3919 3920 for (i = 0; i < m; i++) { 3921 nnz = Ii[i + 1] - Ii[i]; 3922 JJ = J + Ii[i]; 3923 nnz_max = PetscMax(nnz_max, nnz); 3924 d = 0; 3925 for (j = 0; j < nnz; j++) { 3926 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3927 } 3928 d_nnz[i] = d; 3929 o_nnz[i] = nnz - d; 3930 } 3931 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3932 PetscCall(PetscFree2(d_nnz, o_nnz)); 3933 3934 for (i = 0; i < m; i++) { 3935 ii = i + rstart; 3936 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3937 } 3938 nooffprocentries = B->nooffprocentries; 3939 B->nooffprocentries = PETSC_TRUE; 3940 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3941 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3942 B->nooffprocentries = nooffprocentries; 3943 3944 /* count number of entries below block diagonal */ 3945 PetscCall(PetscFree(Aij->ld)); 3946 PetscCall(PetscCalloc1(m, &ld)); 3947 Aij->ld = ld; 3948 for (i = 0; i < m; i++) { 3949 nnz = Ii[i + 1] - Ii[i]; 3950 j = 0; 3951 while (j < nnz && J[j] < cstart) j++; 3952 ld[i] = j; 3953 J += nnz; 3954 } 3955 3956 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3957 PetscFunctionReturn(PETSC_SUCCESS); 3958 } 3959 3960 /*@ 3961 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3962 (the default parallel PETSc format). 3963 3964 Collective 3965 3966 Input Parameters: 3967 + B - the matrix 3968 . i - the indices into j for the start of each local row (starts with zero) 3969 . j - the column indices for each local row (starts with zero) 3970 - v - optional values in the matrix 3971 3972 Level: developer 3973 3974 Notes: 3975 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3976 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3977 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3978 3979 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3980 3981 The format which is used for the sparse matrix input, is equivalent to a 3982 row-major ordering.. i.e for the following matrix, the input data expected is 3983 as shown 3984 3985 $ 1 0 0 3986 $ 2 0 3 P0 3987 $ ------- 3988 $ 4 5 6 P1 3989 $ 3990 $ Process0 [P0]: rows_owned=[0,1] 3991 $ i = {0,1,3} [size = nrow+1 = 2+1] 3992 $ j = {0,0,2} [size = 3] 3993 $ v = {1,2,3} [size = 3] 3994 $ 3995 $ Process1 [P1]: rows_owned=[2] 3996 $ i = {0,3} [size = nrow+1 = 1+1] 3997 $ j = {0,1,2} [size = 3] 3998 $ v = {4,5,6} [size = 3] 3999 4000 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 4001 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 4002 @*/ 4003 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4004 { 4005 PetscFunctionBegin; 4006 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4007 PetscFunctionReturn(PETSC_SUCCESS); 4008 } 4009 4010 /*@C 4011 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4012 (the default parallel PETSc format). For good matrix assembly performance 4013 the user should preallocate the matrix storage by setting the parameters 4014 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4015 performance can be increased by more than a factor of 50. 4016 4017 Collective 4018 4019 Input Parameters: 4020 + B - the matrix 4021 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4022 (same value is used for all local rows) 4023 . d_nnz - array containing the number of nonzeros in the various rows of the 4024 DIAGONAL portion of the local submatrix (possibly different for each row) 4025 or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure. 4026 The size of this array is equal to the number of local rows, i.e 'm'. 4027 For matrices that will be factored, you must leave room for (and set) 4028 the diagonal entry even if it is zero. 4029 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4030 submatrix (same value is used for all local rows). 4031 - o_nnz - array containing the number of nonzeros in the various rows of the 4032 OFF-DIAGONAL portion of the local submatrix (possibly different for 4033 each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero 4034 structure. The size of this array is equal to the number 4035 of local rows, i.e 'm'. 4036 4037 If the *_nnz parameter is given then the *_nz parameter is ignored 4038 4039 The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77 4040 storage. The stored row and column indices begin with zero. 4041 See [Sparse Matrices](sec_matsparse) for details. 4042 4043 The parallel matrix is partitioned such that the first m0 rows belong to 4044 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4045 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4046 4047 The DIAGONAL portion of the local submatrix of a processor can be defined 4048 as the submatrix which is obtained by extraction the part corresponding to 4049 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4050 first row that belongs to the processor, r2 is the last row belonging to 4051 the this processor, and c1-c2 is range of indices of the local part of a 4052 vector suitable for applying the matrix to. This is an mxn matrix. In the 4053 common case of a square matrix, the row and column ranges are the same and 4054 the DIAGONAL part is also square. The remaining portion of the local 4055 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4056 4057 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4058 4059 You can call MatGetInfo() to get information on how effective the preallocation was; 4060 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4061 You can also run with the option -info and look for messages with the string 4062 malloc in them to see if additional memory allocation was needed. 4063 4064 Example usage: 4065 4066 Consider the following 8x8 matrix with 34 non-zero values, that is 4067 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4068 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4069 as follows: 4070 4071 .vb 4072 1 2 0 | 0 3 0 | 0 4 4073 Proc0 0 5 6 | 7 0 0 | 8 0 4074 9 0 10 | 11 0 0 | 12 0 4075 ------------------------------------- 4076 13 0 14 | 15 16 17 | 0 0 4077 Proc1 0 18 0 | 19 20 21 | 0 0 4078 0 0 0 | 22 23 0 | 24 0 4079 ------------------------------------- 4080 Proc2 25 26 27 | 0 0 28 | 29 0 4081 30 0 0 | 31 32 33 | 0 34 4082 .ve 4083 4084 This can be represented as a collection of submatrices as: 4085 4086 .vb 4087 A B C 4088 D E F 4089 G H I 4090 .ve 4091 4092 Where the submatrices A,B,C are owned by proc0, D,E,F are 4093 owned by proc1, G,H,I are owned by proc2. 4094 4095 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4096 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4097 The 'M','N' parameters are 8,8, and have the same values on all procs. 4098 4099 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4100 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4101 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4102 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4103 part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ 4104 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4105 4106 When d_nz, o_nz parameters are specified, d_nz storage elements are 4107 allocated for every row of the local diagonal submatrix, and o_nz 4108 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4109 One way to choose d_nz and o_nz is to use the max nonzerors per local 4110 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4111 In this case, the values of d_nz,o_nz are: 4112 .vb 4113 proc0 : dnz = 2, o_nz = 2 4114 proc1 : dnz = 3, o_nz = 2 4115 proc2 : dnz = 1, o_nz = 4 4116 .ve 4117 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4118 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4119 for proc3. i.e we are using 12+15+10=37 storage locations to store 4120 34 values. 4121 4122 When d_nnz, o_nnz parameters are specified, the storage is specified 4123 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4124 In the above case the values for d_nnz,o_nnz are: 4125 .vb 4126 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4127 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4128 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4129 .ve 4130 Here the space allocated is sum of all the above values i.e 34, and 4131 hence pre-allocation is perfect. 4132 4133 Level: intermediate 4134 4135 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4136 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4137 @*/ 4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4139 { 4140 PetscFunctionBegin; 4141 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4142 PetscValidType(B, 1); 4143 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4144 PetscFunctionReturn(PETSC_SUCCESS); 4145 } 4146 4147 /*@ 4148 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4149 CSR format for the local rows. 4150 4151 Collective 4152 4153 Input Parameters: 4154 + comm - MPI communicator 4155 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4156 . n - This value should be the same as the local size used in creating the 4157 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4158 calculated if N is given) For square matrices n is almost always m. 4159 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4160 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4161 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4162 . j - column indices 4163 - a - optional matrix values 4164 4165 Output Parameter: 4166 . mat - the matrix 4167 4168 Level: intermediate 4169 4170 Notes: 4171 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4172 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4173 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4174 4175 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4176 4177 The format which is used for the sparse matrix input, is equivalent to a 4178 row-major ordering.. i.e for the following matrix, the input data expected is 4179 as shown 4180 4181 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4182 4183 $ 1 0 0 4184 $ 2 0 3 P0 4185 $ ------- 4186 $ 4 5 6 P1 4187 $ 4188 $ Process0 [P0]: rows_owned=[0,1] 4189 $ i = {0,1,3} [size = nrow+1 = 2+1] 4190 $ j = {0,0,2} [size = 3] 4191 $ v = {1,2,3} [size = 3] 4192 $ 4193 $ Process1 [P1]: rows_owned=[2] 4194 $ i = {0,3} [size = nrow+1 = 1+1] 4195 $ j = {0,1,2} [size = 3] 4196 $ v = {4,5,6} [size = 3] 4197 4198 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4199 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4200 @*/ 4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4202 { 4203 PetscFunctionBegin; 4204 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4205 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4206 PetscCall(MatCreate(comm, mat)); 4207 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4208 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4209 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4210 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4211 PetscFunctionReturn(PETSC_SUCCESS); 4212 } 4213 4214 /*@ 4215 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4216 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()` 4217 4218 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4219 4220 Collective 4221 4222 Input Parameters: 4223 + mat - the matrix 4224 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4225 . n - This value should be the same as the local size used in creating the 4226 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4227 calculated if N is given) For square matrices n is almost always m. 4228 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4229 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4230 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4231 . J - column indices 4232 - v - matrix values 4233 4234 Level: intermediate 4235 4236 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4237 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4238 @*/ 4239 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4240 { 4241 PetscInt nnz, i; 4242 PetscBool nooffprocentries; 4243 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4244 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4245 PetscScalar *ad, *ao; 4246 PetscInt ldi, Iii, md; 4247 const PetscInt *Adi = Ad->i; 4248 PetscInt *ld = Aij->ld; 4249 4250 PetscFunctionBegin; 4251 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4252 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4253 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4254 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4255 4256 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4257 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4258 4259 for (i = 0; i < m; i++) { 4260 nnz = Ii[i + 1] - Ii[i]; 4261 Iii = Ii[i]; 4262 ldi = ld[i]; 4263 md = Adi[i + 1] - Adi[i]; 4264 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4265 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4266 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4267 ad += md; 4268 ao += nnz - md; 4269 } 4270 nooffprocentries = mat->nooffprocentries; 4271 mat->nooffprocentries = PETSC_TRUE; 4272 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4273 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4274 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4275 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4276 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4277 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4278 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4279 mat->nooffprocentries = nooffprocentries; 4280 PetscFunctionReturn(PETSC_SUCCESS); 4281 } 4282 4283 /*@ 4284 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4285 4286 Collective 4287 4288 Input Parameters: 4289 + mat - the matrix 4290 - v - matrix values, stored by row 4291 4292 Level: intermediate 4293 4294 Note: 4295 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4296 4297 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4298 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4299 @*/ 4300 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4301 { 4302 PetscInt nnz, i, m; 4303 PetscBool nooffprocentries; 4304 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4307 PetscScalar *ad, *ao; 4308 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4309 PetscInt ldi, Iii, md; 4310 PetscInt *ld = Aij->ld; 4311 4312 PetscFunctionBegin; 4313 m = mat->rmap->n; 4314 4315 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4316 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4317 Iii = 0; 4318 for (i = 0; i < m; i++) { 4319 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4320 ldi = ld[i]; 4321 md = Adi[i + 1] - Adi[i]; 4322 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4323 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4324 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4325 ad += md; 4326 ao += nnz - md; 4327 Iii += nnz; 4328 } 4329 nooffprocentries = mat->nooffprocentries; 4330 mat->nooffprocentries = PETSC_TRUE; 4331 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4332 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4333 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4334 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4335 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4336 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4337 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4338 mat->nooffprocentries = nooffprocentries; 4339 PetscFunctionReturn(PETSC_SUCCESS); 4340 } 4341 4342 /*@C 4343 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4344 (the default parallel PETSc format). For good matrix assembly performance 4345 the user should preallocate the matrix storage by setting the parameters 4346 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4347 performance can be increased by more than a factor of 50. 4348 4349 Collective 4350 4351 Input Parameters: 4352 + comm - MPI communicator 4353 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4354 This value should be the same as the local size used in creating the 4355 y vector for the matrix-vector product y = Ax. 4356 . n - This value should be the same as the local size used in creating the 4357 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4358 calculated if N is given) For square matrices n is almost always m. 4359 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4360 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4361 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4362 (same value is used for all local rows) 4363 . d_nnz - array containing the number of nonzeros in the various rows of the 4364 DIAGONAL portion of the local submatrix (possibly different for each row) 4365 or NULL, if d_nz is used to specify the nonzero structure. 4366 The size of this array is equal to the number of local rows, i.e 'm'. 4367 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4368 submatrix (same value is used for all local rows). 4369 - o_nnz - array containing the number of nonzeros in the various rows of the 4370 OFF-DIAGONAL portion of the local submatrix (possibly different for 4371 each row) or NULL, if o_nz is used to specify the nonzero 4372 structure. The size of this array is equal to the number 4373 of local rows, i.e 'm'. 4374 4375 Output Parameter: 4376 . A - the matrix 4377 4378 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4379 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4380 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4381 4382 Notes: 4383 If the *_nnz parameter is given then the *_nz parameter is ignored 4384 4385 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4386 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4387 storage requirements for this matrix. 4388 4389 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4390 processor than it must be used on all processors that share the object for 4391 that argument. 4392 4393 The user MUST specify either the local or global matrix dimensions 4394 (possibly both). 4395 4396 The parallel matrix is partitioned across processors such that the 4397 first m0 rows belong to process 0, the next m1 rows belong to 4398 process 1, the next m2 rows belong to process 2 etc.. where 4399 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4400 values corresponding to [m x N] submatrix. 4401 4402 The columns are logically partitioned with the n0 columns belonging 4403 to 0th partition, the next n1 columns belonging to the next 4404 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4405 4406 The DIAGONAL portion of the local submatrix on any given processor 4407 is the submatrix corresponding to the rows and columns m,n 4408 corresponding to the given processor. i.e diagonal matrix on 4409 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4410 etc. The remaining portion of the local submatrix [m x (N-n)] 4411 constitute the OFF-DIAGONAL portion. The example below better 4412 illustrates this concept. 4413 4414 For a square global matrix we define each processor's diagonal portion 4415 to be its local rows and the corresponding columns (a square submatrix); 4416 each processor's off-diagonal portion encompasses the remainder of the 4417 local matrix (a rectangular submatrix). 4418 4419 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4420 4421 When calling this routine with a single process communicator, a matrix of 4422 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4423 type of communicator, use the construction mechanism 4424 .vb 4425 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4426 .ve 4427 4428 $ MatCreate(...,&A); 4429 $ MatSetType(A,MATMPIAIJ); 4430 $ MatSetSizes(A, m,n,M,N); 4431 $ MatMPIAIJSetPreallocation(A,...); 4432 4433 By default, this format uses inodes (identical nodes) when possible. 4434 We search for consecutive rows with the same nonzero structure, thereby 4435 reusing matrix information to achieve increased efficiency. 4436 4437 Options Database Keys: 4438 + -mat_no_inode - Do not use inodes 4439 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4440 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4441 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4442 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4443 4444 Example usage: 4445 4446 Consider the following 8x8 matrix with 34 non-zero values, that is 4447 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4448 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4449 as follows 4450 4451 .vb 4452 1 2 0 | 0 3 0 | 0 4 4453 Proc0 0 5 6 | 7 0 0 | 8 0 4454 9 0 10 | 11 0 0 | 12 0 4455 ------------------------------------- 4456 13 0 14 | 15 16 17 | 0 0 4457 Proc1 0 18 0 | 19 20 21 | 0 0 4458 0 0 0 | 22 23 0 | 24 0 4459 ------------------------------------- 4460 Proc2 25 26 27 | 0 0 28 | 29 0 4461 30 0 0 | 31 32 33 | 0 34 4462 .ve 4463 4464 This can be represented as a collection of submatrices as 4465 4466 .vb 4467 A B C 4468 D E F 4469 G H I 4470 .ve 4471 4472 Where the submatrices A,B,C are owned by proc0, D,E,F are 4473 owned by proc1, G,H,I are owned by proc2. 4474 4475 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4476 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4477 The 'M','N' parameters are 8,8, and have the same values on all procs. 4478 4479 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4480 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4481 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4482 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4483 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4484 matrix, ans [DF] as another SeqAIJ matrix. 4485 4486 When d_nz, o_nz parameters are specified, d_nz storage elements are 4487 allocated for every row of the local diagonal submatrix, and o_nz 4488 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4489 One way to choose d_nz and o_nz is to use the max nonzerors per local 4490 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4491 In this case, the values of d_nz,o_nz are 4492 .vb 4493 proc0 : dnz = 2, o_nz = 2 4494 proc1 : dnz = 3, o_nz = 2 4495 proc2 : dnz = 1, o_nz = 4 4496 .ve 4497 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4498 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4499 for proc3. i.e we are using 12+15+10=37 storage locations to store 4500 34 values. 4501 4502 When d_nnz, o_nnz parameters are specified, the storage is specified 4503 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4504 In the above case the values for d_nnz,o_nnz are 4505 .vb 4506 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4507 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4508 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4509 .ve 4510 Here the space allocated is sum of all the above values i.e 34, and 4511 hence pre-allocation is perfect. 4512 4513 Level: intermediate 4514 4515 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4516 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4517 @*/ 4518 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4519 { 4520 PetscMPIInt size; 4521 4522 PetscFunctionBegin; 4523 PetscCall(MatCreate(comm, A)); 4524 PetscCall(MatSetSizes(*A, m, n, M, N)); 4525 PetscCallMPI(MPI_Comm_size(comm, &size)); 4526 if (size > 1) { 4527 PetscCall(MatSetType(*A, MATMPIAIJ)); 4528 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4529 } else { 4530 PetscCall(MatSetType(*A, MATSEQAIJ)); 4531 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4532 } 4533 PetscFunctionReturn(PETSC_SUCCESS); 4534 } 4535 4536 /*MC 4537 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4538 4539 Synopsis: 4540 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4541 4542 Not Collective 4543 4544 Input Parameter: 4545 . A - the `MATMPIAIJ` matrix 4546 4547 Output Parameters: 4548 + Ad - the diagonal portion of the matrix 4549 . Ao - the off diagonal portion of the matrix 4550 . colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4551 - ierr - error code 4552 4553 Level: advanced 4554 4555 Note: 4556 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4557 4558 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4559 M*/ 4560 4561 /*MC 4562 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4563 4564 Synopsis: 4565 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4566 4567 Not Collective 4568 4569 Input Parameters: 4570 + A - the `MATMPIAIJ` matrix 4571 . Ad - the diagonal portion of the matrix 4572 . Ao - the off diagonal portion of the matrix 4573 . colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4574 - ierr - error code 4575 4576 Level: advanced 4577 4578 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4579 M*/ 4580 4581 /*@C 4582 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4583 4584 Not collective 4585 4586 Input Parameter: 4587 . A - The `MATMPIAIJ` matrix 4588 4589 Output Parameters: 4590 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4591 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4592 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4593 4594 Level: intermediate 4595 4596 Note: 4597 The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4598 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4599 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4600 local column numbers to global column numbers in the original matrix. 4601 4602 Fortran Note: 4603 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4604 4605 .seealso: `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4606 @*/ 4607 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4608 { 4609 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4610 PetscBool flg; 4611 4612 PetscFunctionBegin; 4613 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4614 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4615 if (Ad) *Ad = a->A; 4616 if (Ao) *Ao = a->B; 4617 if (colmap) *colmap = a->garray; 4618 PetscFunctionReturn(PETSC_SUCCESS); 4619 } 4620 4621 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4622 { 4623 PetscInt m, N, i, rstart, nnz, Ii; 4624 PetscInt *indx; 4625 PetscScalar *values; 4626 MatType rootType; 4627 4628 PetscFunctionBegin; 4629 PetscCall(MatGetSize(inmat, &m, &N)); 4630 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4631 PetscInt *dnz, *onz, sum, bs, cbs; 4632 4633 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4634 /* Check sum(n) = N */ 4635 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4636 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4637 4638 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4639 rstart -= m; 4640 4641 MatPreallocateBegin(comm, m, n, dnz, onz); 4642 for (i = 0; i < m; i++) { 4643 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4644 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4645 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4646 } 4647 4648 PetscCall(MatCreate(comm, outmat)); 4649 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4650 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4651 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4652 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4653 PetscCall(MatSetType(*outmat, rootType)); 4654 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4655 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4656 MatPreallocateEnd(dnz, onz); 4657 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4658 } 4659 4660 /* numeric phase */ 4661 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4662 for (i = 0; i < m; i++) { 4663 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4664 Ii = i + rstart; 4665 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4666 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4667 } 4668 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4669 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4670 PetscFunctionReturn(PETSC_SUCCESS); 4671 } 4672 4673 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4674 { 4675 PetscMPIInt rank; 4676 PetscInt m, N, i, rstart, nnz; 4677 size_t len; 4678 const PetscInt *indx; 4679 PetscViewer out; 4680 char *name; 4681 Mat B; 4682 const PetscScalar *values; 4683 4684 PetscFunctionBegin; 4685 PetscCall(MatGetLocalSize(A, &m, NULL)); 4686 PetscCall(MatGetSize(A, NULL, &N)); 4687 /* Should this be the type of the diagonal block of A? */ 4688 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4689 PetscCall(MatSetSizes(B, m, N, m, N)); 4690 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4691 PetscCall(MatSetType(B, MATSEQAIJ)); 4692 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4693 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4694 for (i = 0; i < m; i++) { 4695 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4696 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4697 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4698 } 4699 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4700 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4701 4702 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4703 PetscCall(PetscStrlen(outfile, &len)); 4704 PetscCall(PetscMalloc1(len + 6, &name)); 4705 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4706 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4707 PetscCall(PetscFree(name)); 4708 PetscCall(MatView(B, out)); 4709 PetscCall(PetscViewerDestroy(&out)); 4710 PetscCall(MatDestroy(&B)); 4711 PetscFunctionReturn(PETSC_SUCCESS); 4712 } 4713 4714 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4715 { 4716 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4717 4718 PetscFunctionBegin; 4719 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4720 PetscCall(PetscFree(merge->id_r)); 4721 PetscCall(PetscFree(merge->len_s)); 4722 PetscCall(PetscFree(merge->len_r)); 4723 PetscCall(PetscFree(merge->bi)); 4724 PetscCall(PetscFree(merge->bj)); 4725 PetscCall(PetscFree(merge->buf_ri[0])); 4726 PetscCall(PetscFree(merge->buf_ri)); 4727 PetscCall(PetscFree(merge->buf_rj[0])); 4728 PetscCall(PetscFree(merge->buf_rj)); 4729 PetscCall(PetscFree(merge->coi)); 4730 PetscCall(PetscFree(merge->coj)); 4731 PetscCall(PetscFree(merge->owners_co)); 4732 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4733 PetscCall(PetscFree(merge)); 4734 PetscFunctionReturn(PETSC_SUCCESS); 4735 } 4736 4737 #include <../src/mat/utils/freespace.h> 4738 #include <petscbt.h> 4739 4740 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4741 { 4742 MPI_Comm comm; 4743 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4744 PetscMPIInt size, rank, taga, *len_s; 4745 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4746 PetscInt proc, m; 4747 PetscInt **buf_ri, **buf_rj; 4748 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4749 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4750 MPI_Request *s_waits, *r_waits; 4751 MPI_Status *status; 4752 const MatScalar *aa, *a_a; 4753 MatScalar **abuf_r, *ba_i; 4754 Mat_Merge_SeqsToMPI *merge; 4755 PetscContainer container; 4756 4757 PetscFunctionBegin; 4758 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4759 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4760 4761 PetscCallMPI(MPI_Comm_size(comm, &size)); 4762 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4763 4764 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4765 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4766 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4767 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4768 aa = a_a; 4769 4770 bi = merge->bi; 4771 bj = merge->bj; 4772 buf_ri = merge->buf_ri; 4773 buf_rj = merge->buf_rj; 4774 4775 PetscCall(PetscMalloc1(size, &status)); 4776 owners = merge->rowmap->range; 4777 len_s = merge->len_s; 4778 4779 /* send and recv matrix values */ 4780 /*-----------------------------*/ 4781 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4782 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4783 4784 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4785 for (proc = 0, k = 0; proc < size; proc++) { 4786 if (!len_s[proc]) continue; 4787 i = owners[proc]; 4788 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4789 k++; 4790 } 4791 4792 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4793 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4794 PetscCall(PetscFree(status)); 4795 4796 PetscCall(PetscFree(s_waits)); 4797 PetscCall(PetscFree(r_waits)); 4798 4799 /* insert mat values of mpimat */ 4800 /*----------------------------*/ 4801 PetscCall(PetscMalloc1(N, &ba_i)); 4802 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4803 4804 for (k = 0; k < merge->nrecv; k++) { 4805 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4806 nrows = *(buf_ri_k[k]); 4807 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4808 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4809 } 4810 4811 /* set values of ba */ 4812 m = merge->rowmap->n; 4813 for (i = 0; i < m; i++) { 4814 arow = owners[rank] + i; 4815 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4816 bnzi = bi[i + 1] - bi[i]; 4817 PetscCall(PetscArrayzero(ba_i, bnzi)); 4818 4819 /* add local non-zero vals of this proc's seqmat into ba */ 4820 anzi = ai[arow + 1] - ai[arow]; 4821 aj = a->j + ai[arow]; 4822 aa = a_a + ai[arow]; 4823 nextaj = 0; 4824 for (j = 0; nextaj < anzi; j++) { 4825 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4826 ba_i[j] += aa[nextaj++]; 4827 } 4828 } 4829 4830 /* add received vals into ba */ 4831 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4832 /* i-th row */ 4833 if (i == *nextrow[k]) { 4834 anzi = *(nextai[k] + 1) - *nextai[k]; 4835 aj = buf_rj[k] + *(nextai[k]); 4836 aa = abuf_r[k] + *(nextai[k]); 4837 nextaj = 0; 4838 for (j = 0; nextaj < anzi; j++) { 4839 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4840 ba_i[j] += aa[nextaj++]; 4841 } 4842 } 4843 nextrow[k]++; 4844 nextai[k]++; 4845 } 4846 } 4847 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4848 } 4849 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4850 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4851 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4852 4853 PetscCall(PetscFree(abuf_r[0])); 4854 PetscCall(PetscFree(abuf_r)); 4855 PetscCall(PetscFree(ba_i)); 4856 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4857 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4858 PetscFunctionReturn(PETSC_SUCCESS); 4859 } 4860 4861 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4862 { 4863 Mat B_mpi; 4864 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4865 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4866 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4867 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4868 PetscInt len, proc, *dnz, *onz, bs, cbs; 4869 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4870 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4871 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4872 MPI_Status *status; 4873 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4874 PetscBT lnkbt; 4875 Mat_Merge_SeqsToMPI *merge; 4876 PetscContainer container; 4877 4878 PetscFunctionBegin; 4879 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4880 4881 /* make sure it is a PETSc comm */ 4882 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4883 PetscCallMPI(MPI_Comm_size(comm, &size)); 4884 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4885 4886 PetscCall(PetscNew(&merge)); 4887 PetscCall(PetscMalloc1(size, &status)); 4888 4889 /* determine row ownership */ 4890 /*---------------------------------------------------------*/ 4891 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4892 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4893 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4894 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4895 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4896 PetscCall(PetscMalloc1(size, &len_si)); 4897 PetscCall(PetscMalloc1(size, &merge->len_s)); 4898 4899 m = merge->rowmap->n; 4900 owners = merge->rowmap->range; 4901 4902 /* determine the number of messages to send, their lengths */ 4903 /*---------------------------------------------------------*/ 4904 len_s = merge->len_s; 4905 4906 len = 0; /* length of buf_si[] */ 4907 merge->nsend = 0; 4908 for (proc = 0; proc < size; proc++) { 4909 len_si[proc] = 0; 4910 if (proc == rank) { 4911 len_s[proc] = 0; 4912 } else { 4913 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4914 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4915 } 4916 if (len_s[proc]) { 4917 merge->nsend++; 4918 nrows = 0; 4919 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4920 if (ai[i + 1] > ai[i]) nrows++; 4921 } 4922 len_si[proc] = 2 * (nrows + 1); 4923 len += len_si[proc]; 4924 } 4925 } 4926 4927 /* determine the number and length of messages to receive for ij-structure */ 4928 /*-------------------------------------------------------------------------*/ 4929 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4930 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4931 4932 /* post the Irecv of j-structure */ 4933 /*-------------------------------*/ 4934 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4935 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4936 4937 /* post the Isend of j-structure */ 4938 /*--------------------------------*/ 4939 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4940 4941 for (proc = 0, k = 0; proc < size; proc++) { 4942 if (!len_s[proc]) continue; 4943 i = owners[proc]; 4944 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4945 k++; 4946 } 4947 4948 /* receives and sends of j-structure are complete */ 4949 /*------------------------------------------------*/ 4950 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4951 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4952 4953 /* send and recv i-structure */ 4954 /*---------------------------*/ 4955 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4956 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4957 4958 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4959 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4960 for (proc = 0, k = 0; proc < size; proc++) { 4961 if (!len_s[proc]) continue; 4962 /* form outgoing message for i-structure: 4963 buf_si[0]: nrows to be sent 4964 [1:nrows]: row index (global) 4965 [nrows+1:2*nrows+1]: i-structure index 4966 */ 4967 /*-------------------------------------------*/ 4968 nrows = len_si[proc] / 2 - 1; 4969 buf_si_i = buf_si + nrows + 1; 4970 buf_si[0] = nrows; 4971 buf_si_i[0] = 0; 4972 nrows = 0; 4973 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4974 anzi = ai[i + 1] - ai[i]; 4975 if (anzi) { 4976 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4977 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4978 nrows++; 4979 } 4980 } 4981 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4982 k++; 4983 buf_si += len_si[proc]; 4984 } 4985 4986 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4987 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4988 4989 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4990 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4991 4992 PetscCall(PetscFree(len_si)); 4993 PetscCall(PetscFree(len_ri)); 4994 PetscCall(PetscFree(rj_waits)); 4995 PetscCall(PetscFree2(si_waits, sj_waits)); 4996 PetscCall(PetscFree(ri_waits)); 4997 PetscCall(PetscFree(buf_s)); 4998 PetscCall(PetscFree(status)); 4999 5000 /* compute a local seq matrix in each processor */ 5001 /*----------------------------------------------*/ 5002 /* allocate bi array and free space for accumulating nonzero column info */ 5003 PetscCall(PetscMalloc1(m + 1, &bi)); 5004 bi[0] = 0; 5005 5006 /* create and initialize a linked list */ 5007 nlnk = N + 1; 5008 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 5009 5010 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5011 len = ai[owners[rank + 1]] - ai[owners[rank]]; 5012 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5013 5014 current_space = free_space; 5015 5016 /* determine symbolic info for each local row */ 5017 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5018 5019 for (k = 0; k < merge->nrecv; k++) { 5020 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5021 nrows = *buf_ri_k[k]; 5022 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5023 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5024 } 5025 5026 MatPreallocateBegin(comm, m, n, dnz, onz); 5027 len = 0; 5028 for (i = 0; i < m; i++) { 5029 bnzi = 0; 5030 /* add local non-zero cols of this proc's seqmat into lnk */ 5031 arow = owners[rank] + i; 5032 anzi = ai[arow + 1] - ai[arow]; 5033 aj = a->j + ai[arow]; 5034 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5035 bnzi += nlnk; 5036 /* add received col data into lnk */ 5037 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5038 if (i == *nextrow[k]) { /* i-th row */ 5039 anzi = *(nextai[k] + 1) - *nextai[k]; 5040 aj = buf_rj[k] + *nextai[k]; 5041 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5042 bnzi += nlnk; 5043 nextrow[k]++; 5044 nextai[k]++; 5045 } 5046 } 5047 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5048 5049 /* if free space is not available, make more free space */ 5050 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5051 /* copy data into free space, then initialize lnk */ 5052 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5053 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5054 5055 current_space->array += bnzi; 5056 current_space->local_used += bnzi; 5057 current_space->local_remaining -= bnzi; 5058 5059 bi[i + 1] = bi[i] + bnzi; 5060 } 5061 5062 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5063 5064 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5065 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5066 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5067 5068 /* create symbolic parallel matrix B_mpi */ 5069 /*---------------------------------------*/ 5070 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5071 PetscCall(MatCreate(comm, &B_mpi)); 5072 if (n == PETSC_DECIDE) { 5073 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5074 } else { 5075 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5076 } 5077 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5078 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5079 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5080 MatPreallocateEnd(dnz, onz); 5081 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5082 5083 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5084 B_mpi->assembled = PETSC_FALSE; 5085 merge->bi = bi; 5086 merge->bj = bj; 5087 merge->buf_ri = buf_ri; 5088 merge->buf_rj = buf_rj; 5089 merge->coi = NULL; 5090 merge->coj = NULL; 5091 merge->owners_co = NULL; 5092 5093 PetscCall(PetscCommDestroy(&comm)); 5094 5095 /* attach the supporting struct to B_mpi for reuse */ 5096 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5097 PetscCall(PetscContainerSetPointer(container, merge)); 5098 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5099 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5100 PetscCall(PetscContainerDestroy(&container)); 5101 *mpimat = B_mpi; 5102 5103 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5104 PetscFunctionReturn(PETSC_SUCCESS); 5105 } 5106 5107 /*@C 5108 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5109 matrices from each processor 5110 5111 Collective 5112 5113 Input Parameters: 5114 + comm - the communicators the parallel matrix will live on 5115 . seqmat - the input sequential matrices 5116 . m - number of local rows (or `PETSC_DECIDE`) 5117 . n - number of local columns (or `PETSC_DECIDE`) 5118 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5119 5120 Output Parameter: 5121 . mpimat - the parallel matrix generated 5122 5123 Level: advanced 5124 5125 Note: 5126 The dimensions of the sequential matrix in each processor MUST be the same. 5127 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5128 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5129 @*/ 5130 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5131 { 5132 PetscMPIInt size; 5133 5134 PetscFunctionBegin; 5135 PetscCallMPI(MPI_Comm_size(comm, &size)); 5136 if (size == 1) { 5137 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5138 if (scall == MAT_INITIAL_MATRIX) { 5139 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5140 } else { 5141 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5142 } 5143 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5144 PetscFunctionReturn(PETSC_SUCCESS); 5145 } 5146 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5147 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5148 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5149 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5150 PetscFunctionReturn(PETSC_SUCCESS); 5151 } 5152 5153 /*@ 5154 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5155 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5156 with `MatGetSize()` 5157 5158 Not Collective 5159 5160 Input Parameters: 5161 + A - the matrix 5162 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5163 5164 Output Parameter: 5165 . A_loc - the local sequential matrix generated 5166 5167 Level: developer 5168 5169 Notes: 5170 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5171 5172 Destroy the matrix with `MatDestroy()` 5173 5174 .seealso: `MatMPIAIJGetLocalMat()` 5175 @*/ 5176 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5177 { 5178 PetscBool mpi; 5179 5180 PetscFunctionBegin; 5181 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5182 if (mpi) { 5183 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5184 } else { 5185 *A_loc = A; 5186 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5187 } 5188 PetscFunctionReturn(PETSC_SUCCESS); 5189 } 5190 5191 /*@ 5192 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5193 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5194 with `MatGetSize()` 5195 5196 Not Collective 5197 5198 Input Parameters: 5199 + A - the matrix 5200 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5201 5202 Output Parameter: 5203 . A_loc - the local sequential matrix generated 5204 5205 Level: developer 5206 5207 Notes: 5208 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5209 5210 When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A. 5211 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called. 5212 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5213 modify the values of the returned A_loc. 5214 5215 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5216 @*/ 5217 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5218 { 5219 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5220 Mat_SeqAIJ *mat, *a, *b; 5221 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5222 const PetscScalar *aa, *ba, *aav, *bav; 5223 PetscScalar *ca, *cam; 5224 PetscMPIInt size; 5225 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5226 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5227 PetscBool match; 5228 5229 PetscFunctionBegin; 5230 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5231 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5232 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5233 if (size == 1) { 5234 if (scall == MAT_INITIAL_MATRIX) { 5235 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5236 *A_loc = mpimat->A; 5237 } else if (scall == MAT_REUSE_MATRIX) { 5238 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5239 } 5240 PetscFunctionReturn(PETSC_SUCCESS); 5241 } 5242 5243 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5244 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5245 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5246 ai = a->i; 5247 aj = a->j; 5248 bi = b->i; 5249 bj = b->j; 5250 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5251 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5252 aa = aav; 5253 ba = bav; 5254 if (scall == MAT_INITIAL_MATRIX) { 5255 PetscCall(PetscMalloc1(1 + am, &ci)); 5256 ci[0] = 0; 5257 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5258 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5259 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5260 k = 0; 5261 for (i = 0; i < am; i++) { 5262 ncols_o = bi[i + 1] - bi[i]; 5263 ncols_d = ai[i + 1] - ai[i]; 5264 /* off-diagonal portion of A */ 5265 for (jo = 0; jo < ncols_o; jo++) { 5266 col = cmap[*bj]; 5267 if (col >= cstart) break; 5268 cj[k] = col; 5269 bj++; 5270 ca[k++] = *ba++; 5271 } 5272 /* diagonal portion of A */ 5273 for (j = 0; j < ncols_d; j++) { 5274 cj[k] = cstart + *aj++; 5275 ca[k++] = *aa++; 5276 } 5277 /* off-diagonal portion of A */ 5278 for (j = jo; j < ncols_o; j++) { 5279 cj[k] = cmap[*bj++]; 5280 ca[k++] = *ba++; 5281 } 5282 } 5283 /* put together the new matrix */ 5284 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5285 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5286 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5287 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5288 mat->free_a = PETSC_TRUE; 5289 mat->free_ij = PETSC_TRUE; 5290 mat->nonew = 0; 5291 } else if (scall == MAT_REUSE_MATRIX) { 5292 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5293 ci = mat->i; 5294 cj = mat->j; 5295 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5296 for (i = 0; i < am; i++) { 5297 /* off-diagonal portion of A */ 5298 ncols_o = bi[i + 1] - bi[i]; 5299 for (jo = 0; jo < ncols_o; jo++) { 5300 col = cmap[*bj]; 5301 if (col >= cstart) break; 5302 *cam++ = *ba++; 5303 bj++; 5304 } 5305 /* diagonal portion of A */ 5306 ncols_d = ai[i + 1] - ai[i]; 5307 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5308 /* off-diagonal portion of A */ 5309 for (j = jo; j < ncols_o; j++) { 5310 *cam++ = *ba++; 5311 bj++; 5312 } 5313 } 5314 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5315 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5316 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5317 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5318 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5319 PetscFunctionReturn(PETSC_SUCCESS); 5320 } 5321 5322 /*@ 5323 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5324 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5325 5326 Not Collective 5327 5328 Input Parameters: 5329 + A - the matrix 5330 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5331 5332 Output Parameters: 5333 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5334 - A_loc - the local sequential matrix generated 5335 5336 Level: developer 5337 5338 Note: 5339 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering) 5340 5341 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5342 @*/ 5343 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5344 { 5345 Mat Ao, Ad; 5346 const PetscInt *cmap; 5347 PetscMPIInt size; 5348 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5349 5350 PetscFunctionBegin; 5351 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5352 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5353 if (size == 1) { 5354 if (scall == MAT_INITIAL_MATRIX) { 5355 PetscCall(PetscObjectReference((PetscObject)Ad)); 5356 *A_loc = Ad; 5357 } else if (scall == MAT_REUSE_MATRIX) { 5358 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5359 } 5360 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5361 PetscFunctionReturn(PETSC_SUCCESS); 5362 } 5363 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5364 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5365 if (f) { 5366 PetscCall((*f)(A, scall, glob, A_loc)); 5367 } else { 5368 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5369 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5370 Mat_SeqAIJ *c; 5371 PetscInt *ai = a->i, *aj = a->j; 5372 PetscInt *bi = b->i, *bj = b->j; 5373 PetscInt *ci, *cj; 5374 const PetscScalar *aa, *ba; 5375 PetscScalar *ca; 5376 PetscInt i, j, am, dn, on; 5377 5378 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5379 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5380 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5381 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5382 if (scall == MAT_INITIAL_MATRIX) { 5383 PetscInt k; 5384 PetscCall(PetscMalloc1(1 + am, &ci)); 5385 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5386 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5387 ci[0] = 0; 5388 for (i = 0, k = 0; i < am; i++) { 5389 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5390 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5391 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5392 /* diagonal portion of A */ 5393 for (j = 0; j < ncols_d; j++, k++) { 5394 cj[k] = *aj++; 5395 ca[k] = *aa++; 5396 } 5397 /* off-diagonal portion of A */ 5398 for (j = 0; j < ncols_o; j++, k++) { 5399 cj[k] = dn + *bj++; 5400 ca[k] = *ba++; 5401 } 5402 } 5403 /* put together the new matrix */ 5404 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5405 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5406 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5407 c = (Mat_SeqAIJ *)(*A_loc)->data; 5408 c->free_a = PETSC_TRUE; 5409 c->free_ij = PETSC_TRUE; 5410 c->nonew = 0; 5411 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5412 } else if (scall == MAT_REUSE_MATRIX) { 5413 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5414 for (i = 0; i < am; i++) { 5415 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5416 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5417 /* diagonal portion of A */ 5418 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5419 /* off-diagonal portion of A */ 5420 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5421 } 5422 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5423 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5424 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5425 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5426 if (glob) { 5427 PetscInt cst, *gidx; 5428 5429 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5430 PetscCall(PetscMalloc1(dn + on, &gidx)); 5431 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5432 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5433 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5434 } 5435 } 5436 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5437 PetscFunctionReturn(PETSC_SUCCESS); 5438 } 5439 5440 /*@C 5441 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5442 5443 Not Collective 5444 5445 Input Parameters: 5446 + A - the matrix 5447 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5448 - row, col - index sets of rows and columns to extract (or NULL) 5449 5450 Output Parameter: 5451 . A_loc - the local sequential matrix generated 5452 5453 Level: developer 5454 5455 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5456 @*/ 5457 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5458 { 5459 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5460 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5461 IS isrowa, iscola; 5462 Mat *aloc; 5463 PetscBool match; 5464 5465 PetscFunctionBegin; 5466 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5467 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5468 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5469 if (!row) { 5470 start = A->rmap->rstart; 5471 end = A->rmap->rend; 5472 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5473 } else { 5474 isrowa = *row; 5475 } 5476 if (!col) { 5477 start = A->cmap->rstart; 5478 cmap = a->garray; 5479 nzA = a->A->cmap->n; 5480 nzB = a->B->cmap->n; 5481 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5482 ncols = 0; 5483 for (i = 0; i < nzB; i++) { 5484 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5485 else break; 5486 } 5487 imark = i; 5488 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5489 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5490 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5491 } else { 5492 iscola = *col; 5493 } 5494 if (scall != MAT_INITIAL_MATRIX) { 5495 PetscCall(PetscMalloc1(1, &aloc)); 5496 aloc[0] = *A_loc; 5497 } 5498 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5499 if (!col) { /* attach global id of condensed columns */ 5500 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5501 } 5502 *A_loc = aloc[0]; 5503 PetscCall(PetscFree(aloc)); 5504 if (!row) PetscCall(ISDestroy(&isrowa)); 5505 if (!col) PetscCall(ISDestroy(&iscola)); 5506 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5507 PetscFunctionReturn(PETSC_SUCCESS); 5508 } 5509 5510 /* 5511 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5512 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5513 * on a global size. 5514 * */ 5515 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5516 { 5517 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5518 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5519 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5520 PetscMPIInt owner; 5521 PetscSFNode *iremote, *oiremote; 5522 const PetscInt *lrowindices; 5523 PetscSF sf, osf; 5524 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5525 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5526 MPI_Comm comm; 5527 ISLocalToGlobalMapping mapping; 5528 const PetscScalar *pd_a, *po_a; 5529 5530 PetscFunctionBegin; 5531 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5532 /* plocalsize is the number of roots 5533 * nrows is the number of leaves 5534 * */ 5535 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5536 PetscCall(ISGetLocalSize(rows, &nrows)); 5537 PetscCall(PetscCalloc1(nrows, &iremote)); 5538 PetscCall(ISGetIndices(rows, &lrowindices)); 5539 for (i = 0; i < nrows; i++) { 5540 /* Find a remote index and an owner for a row 5541 * The row could be local or remote 5542 * */ 5543 owner = 0; 5544 lidx = 0; 5545 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5546 iremote[i].index = lidx; 5547 iremote[i].rank = owner; 5548 } 5549 /* Create SF to communicate how many nonzero columns for each row */ 5550 PetscCall(PetscSFCreate(comm, &sf)); 5551 /* SF will figure out the number of nonzero colunms for each row, and their 5552 * offsets 5553 * */ 5554 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5555 PetscCall(PetscSFSetFromOptions(sf)); 5556 PetscCall(PetscSFSetUp(sf)); 5557 5558 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5559 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5560 PetscCall(PetscCalloc1(nrows, &pnnz)); 5561 roffsets[0] = 0; 5562 roffsets[1] = 0; 5563 for (i = 0; i < plocalsize; i++) { 5564 /* diag */ 5565 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5566 /* off diag */ 5567 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5568 /* compute offsets so that we relative location for each row */ 5569 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5570 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5571 } 5572 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5573 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5574 /* 'r' means root, and 'l' means leaf */ 5575 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5576 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5577 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5578 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5579 PetscCall(PetscSFDestroy(&sf)); 5580 PetscCall(PetscFree(roffsets)); 5581 PetscCall(PetscFree(nrcols)); 5582 dntotalcols = 0; 5583 ontotalcols = 0; 5584 ncol = 0; 5585 for (i = 0; i < nrows; i++) { 5586 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5587 ncol = PetscMax(pnnz[i], ncol); 5588 /* diag */ 5589 dntotalcols += nlcols[i * 2 + 0]; 5590 /* off diag */ 5591 ontotalcols += nlcols[i * 2 + 1]; 5592 } 5593 /* We do not need to figure the right number of columns 5594 * since all the calculations will be done by going through the raw data 5595 * */ 5596 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5597 PetscCall(MatSetUp(*P_oth)); 5598 PetscCall(PetscFree(pnnz)); 5599 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5600 /* diag */ 5601 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5602 /* off diag */ 5603 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5604 /* diag */ 5605 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5606 /* off diag */ 5607 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5608 dntotalcols = 0; 5609 ontotalcols = 0; 5610 ntotalcols = 0; 5611 for (i = 0; i < nrows; i++) { 5612 owner = 0; 5613 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5614 /* Set iremote for diag matrix */ 5615 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5616 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5617 iremote[dntotalcols].rank = owner; 5618 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5619 ilocal[dntotalcols++] = ntotalcols++; 5620 } 5621 /* off diag */ 5622 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5623 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5624 oiremote[ontotalcols].rank = owner; 5625 oilocal[ontotalcols++] = ntotalcols++; 5626 } 5627 } 5628 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5629 PetscCall(PetscFree(loffsets)); 5630 PetscCall(PetscFree(nlcols)); 5631 PetscCall(PetscSFCreate(comm, &sf)); 5632 /* P serves as roots and P_oth is leaves 5633 * Diag matrix 5634 * */ 5635 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5636 PetscCall(PetscSFSetFromOptions(sf)); 5637 PetscCall(PetscSFSetUp(sf)); 5638 5639 PetscCall(PetscSFCreate(comm, &osf)); 5640 /* Off diag */ 5641 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5642 PetscCall(PetscSFSetFromOptions(osf)); 5643 PetscCall(PetscSFSetUp(osf)); 5644 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5645 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5646 /* We operate on the matrix internal data for saving memory */ 5647 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5648 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5649 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5650 /* Convert to global indices for diag matrix */ 5651 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5652 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5653 /* We want P_oth store global indices */ 5654 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5655 /* Use memory scalable approach */ 5656 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5657 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5658 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5659 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5660 /* Convert back to local indices */ 5661 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5662 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5663 nout = 0; 5664 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5665 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5666 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5667 /* Exchange values */ 5668 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5669 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5670 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5671 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5672 /* Stop PETSc from shrinking memory */ 5673 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5674 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5675 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5676 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5677 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5678 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5679 PetscCall(PetscSFDestroy(&sf)); 5680 PetscCall(PetscSFDestroy(&osf)); 5681 PetscFunctionReturn(PETSC_SUCCESS); 5682 } 5683 5684 /* 5685 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5686 * This supports MPIAIJ and MAIJ 5687 * */ 5688 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5689 { 5690 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5691 Mat_SeqAIJ *p_oth; 5692 IS rows, map; 5693 PetscHMapI hamp; 5694 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5695 MPI_Comm comm; 5696 PetscSF sf, osf; 5697 PetscBool has; 5698 5699 PetscFunctionBegin; 5700 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5701 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5702 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5703 * and then create a submatrix (that often is an overlapping matrix) 5704 * */ 5705 if (reuse == MAT_INITIAL_MATRIX) { 5706 /* Use a hash table to figure out unique keys */ 5707 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5708 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5709 count = 0; 5710 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5711 for (i = 0; i < a->B->cmap->n; i++) { 5712 key = a->garray[i] / dof; 5713 PetscCall(PetscHMapIHas(hamp, key, &has)); 5714 if (!has) { 5715 mapping[i] = count; 5716 PetscCall(PetscHMapISet(hamp, key, count++)); 5717 } else { 5718 /* Current 'i' has the same value the previous step */ 5719 mapping[i] = count - 1; 5720 } 5721 } 5722 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5723 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5724 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5725 PetscCall(PetscCalloc1(htsize, &rowindices)); 5726 off = 0; 5727 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5728 PetscCall(PetscHMapIDestroy(&hamp)); 5729 PetscCall(PetscSortInt(htsize, rowindices)); 5730 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5731 /* In case, the matrix was already created but users want to recreate the matrix */ 5732 PetscCall(MatDestroy(P_oth)); 5733 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5734 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5735 PetscCall(ISDestroy(&map)); 5736 PetscCall(ISDestroy(&rows)); 5737 } else if (reuse == MAT_REUSE_MATRIX) { 5738 /* If matrix was already created, we simply update values using SF objects 5739 * that as attached to the matrix earlier. 5740 */ 5741 const PetscScalar *pd_a, *po_a; 5742 5743 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5744 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5745 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5746 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5747 /* Update values in place */ 5748 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5749 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5750 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5751 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5752 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5753 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5754 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5755 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5756 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5757 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5758 PetscFunctionReturn(PETSC_SUCCESS); 5759 } 5760 5761 /*@C 5762 MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A 5763 5764 Collective 5765 5766 Input Parameters: 5767 + A - the first matrix in `MATMPIAIJ` format 5768 . B - the second matrix in `MATMPIAIJ` format 5769 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5770 5771 Output Parameters: 5772 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5773 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5774 - B_seq - the sequential matrix generated 5775 5776 Level: developer 5777 5778 @*/ 5779 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5780 { 5781 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5782 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5783 IS isrowb, iscolb; 5784 Mat *bseq = NULL; 5785 5786 PetscFunctionBegin; 5787 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5788 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5789 } 5790 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5791 5792 if (scall == MAT_INITIAL_MATRIX) { 5793 start = A->cmap->rstart; 5794 cmap = a->garray; 5795 nzA = a->A->cmap->n; 5796 nzB = a->B->cmap->n; 5797 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5798 ncols = 0; 5799 for (i = 0; i < nzB; i++) { /* row < local row index */ 5800 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5801 else break; 5802 } 5803 imark = i; 5804 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5805 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5806 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5807 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5808 } else { 5809 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5810 isrowb = *rowb; 5811 iscolb = *colb; 5812 PetscCall(PetscMalloc1(1, &bseq)); 5813 bseq[0] = *B_seq; 5814 } 5815 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5816 *B_seq = bseq[0]; 5817 PetscCall(PetscFree(bseq)); 5818 if (!rowb) { 5819 PetscCall(ISDestroy(&isrowb)); 5820 } else { 5821 *rowb = isrowb; 5822 } 5823 if (!colb) { 5824 PetscCall(ISDestroy(&iscolb)); 5825 } else { 5826 *colb = iscolb; 5827 } 5828 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5829 PetscFunctionReturn(PETSC_SUCCESS); 5830 } 5831 5832 /* 5833 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5834 of the OFF-DIAGONAL portion of local A 5835 5836 Collective 5837 5838 Input Parameters: 5839 + A,B - the matrices in mpiaij format 5840 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5841 5842 Output Parameter: 5843 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5844 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5845 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5846 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5847 5848 Developer Note: 5849 This directly accesses information inside the VecScatter associated with the matrix-vector product 5850 for this matrix. This is not desirable.. 5851 5852 Level: developer 5853 5854 */ 5855 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5856 { 5857 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5858 Mat_SeqAIJ *b_oth; 5859 VecScatter ctx; 5860 MPI_Comm comm; 5861 const PetscMPIInt *rprocs, *sprocs; 5862 const PetscInt *srow, *rstarts, *sstarts; 5863 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5864 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5865 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5866 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5867 PetscMPIInt size, tag, rank, nreqs; 5868 5869 PetscFunctionBegin; 5870 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5871 PetscCallMPI(MPI_Comm_size(comm, &size)); 5872 5873 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5874 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5875 } 5876 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5877 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5878 5879 if (size == 1) { 5880 startsj_s = NULL; 5881 bufa_ptr = NULL; 5882 *B_oth = NULL; 5883 PetscFunctionReturn(PETSC_SUCCESS); 5884 } 5885 5886 ctx = a->Mvctx; 5887 tag = ((PetscObject)ctx)->tag; 5888 5889 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5890 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5891 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5892 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5893 PetscCall(PetscMalloc1(nreqs, &reqs)); 5894 rwaits = reqs; 5895 swaits = reqs + nrecvs; 5896 5897 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5898 if (scall == MAT_INITIAL_MATRIX) { 5899 /* i-array */ 5900 /*---------*/ 5901 /* post receives */ 5902 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5903 for (i = 0; i < nrecvs; i++) { 5904 rowlen = rvalues + rstarts[i] * rbs; 5905 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5906 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5907 } 5908 5909 /* pack the outgoing message */ 5910 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5911 5912 sstartsj[0] = 0; 5913 rstartsj[0] = 0; 5914 len = 0; /* total length of j or a array to be sent */ 5915 if (nsends) { 5916 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5917 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5918 } 5919 for (i = 0; i < nsends; i++) { 5920 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5921 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5922 for (j = 0; j < nrows; j++) { 5923 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5924 for (l = 0; l < sbs; l++) { 5925 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5926 5927 rowlen[j * sbs + l] = ncols; 5928 5929 len += ncols; 5930 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5931 } 5932 k++; 5933 } 5934 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5935 5936 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5937 } 5938 /* recvs and sends of i-array are completed */ 5939 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5940 PetscCall(PetscFree(svalues)); 5941 5942 /* allocate buffers for sending j and a arrays */ 5943 PetscCall(PetscMalloc1(len + 1, &bufj)); 5944 PetscCall(PetscMalloc1(len + 1, &bufa)); 5945 5946 /* create i-array of B_oth */ 5947 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5948 5949 b_othi[0] = 0; 5950 len = 0; /* total length of j or a array to be received */ 5951 k = 0; 5952 for (i = 0; i < nrecvs; i++) { 5953 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5954 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5955 for (j = 0; j < nrows; j++) { 5956 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5957 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5958 k++; 5959 } 5960 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5961 } 5962 PetscCall(PetscFree(rvalues)); 5963 5964 /* allocate space for j and a arrays of B_oth */ 5965 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5966 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5967 5968 /* j-array */ 5969 /*---------*/ 5970 /* post receives of j-array */ 5971 for (i = 0; i < nrecvs; i++) { 5972 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5973 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5974 } 5975 5976 /* pack the outgoing message j-array */ 5977 if (nsends) k = sstarts[0]; 5978 for (i = 0; i < nsends; i++) { 5979 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5980 bufJ = bufj + sstartsj[i]; 5981 for (j = 0; j < nrows; j++) { 5982 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5983 for (ll = 0; ll < sbs; ll++) { 5984 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5985 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5986 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5987 } 5988 } 5989 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5990 } 5991 5992 /* recvs and sends of j-array are completed */ 5993 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5994 } else if (scall == MAT_REUSE_MATRIX) { 5995 sstartsj = *startsj_s; 5996 rstartsj = *startsj_r; 5997 bufa = *bufa_ptr; 5998 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5999 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 6000 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 6001 6002 /* a-array */ 6003 /*---------*/ 6004 /* post receives of a-array */ 6005 for (i = 0; i < nrecvs; i++) { 6006 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 6007 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 6008 } 6009 6010 /* pack the outgoing message a-array */ 6011 if (nsends) k = sstarts[0]; 6012 for (i = 0; i < nsends; i++) { 6013 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6014 bufA = bufa + sstartsj[i]; 6015 for (j = 0; j < nrows; j++) { 6016 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6017 for (ll = 0; ll < sbs; ll++) { 6018 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6019 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6020 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6021 } 6022 } 6023 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6024 } 6025 /* recvs and sends of a-array are completed */ 6026 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6027 PetscCall(PetscFree(reqs)); 6028 6029 if (scall == MAT_INITIAL_MATRIX) { 6030 /* put together the new matrix */ 6031 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6032 6033 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6034 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6035 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6036 b_oth->free_a = PETSC_TRUE; 6037 b_oth->free_ij = PETSC_TRUE; 6038 b_oth->nonew = 0; 6039 6040 PetscCall(PetscFree(bufj)); 6041 if (!startsj_s || !bufa_ptr) { 6042 PetscCall(PetscFree2(sstartsj, rstartsj)); 6043 PetscCall(PetscFree(bufa_ptr)); 6044 } else { 6045 *startsj_s = sstartsj; 6046 *startsj_r = rstartsj; 6047 *bufa_ptr = bufa; 6048 } 6049 } else if (scall == MAT_REUSE_MATRIX) { 6050 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6051 } 6052 6053 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6054 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6055 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6056 PetscFunctionReturn(PETSC_SUCCESS); 6057 } 6058 6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6062 #if defined(PETSC_HAVE_MKL_SPARSE) 6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6064 #endif 6065 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6067 #if defined(PETSC_HAVE_ELEMENTAL) 6068 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6069 #endif 6070 #if defined(PETSC_HAVE_SCALAPACK) 6071 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6072 #endif 6073 #if defined(PETSC_HAVE_HYPRE) 6074 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6075 #endif 6076 #if defined(PETSC_HAVE_CUDA) 6077 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6078 #endif 6079 #if defined(PETSC_HAVE_HIP) 6080 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6081 #endif 6082 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6083 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6084 #endif 6085 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6086 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6087 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6088 6089 /* 6090 Computes (B'*A')' since computing B*A directly is untenable 6091 6092 n p p 6093 [ ] [ ] [ ] 6094 m [ A ] * n [ B ] = m [ C ] 6095 [ ] [ ] [ ] 6096 6097 */ 6098 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6099 { 6100 Mat At, Bt, Ct; 6101 6102 PetscFunctionBegin; 6103 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6104 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6105 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6106 PetscCall(MatDestroy(&At)); 6107 PetscCall(MatDestroy(&Bt)); 6108 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6109 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6110 PetscCall(MatDestroy(&Ct)); 6111 PetscFunctionReturn(PETSC_SUCCESS); 6112 } 6113 6114 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6115 { 6116 PetscBool cisdense; 6117 6118 PetscFunctionBegin; 6119 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6120 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6121 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6122 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6123 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6124 PetscCall(MatSetUp(C)); 6125 6126 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6127 PetscFunctionReturn(PETSC_SUCCESS); 6128 } 6129 6130 /* ----------------------------------------------------------------*/ 6131 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6132 { 6133 Mat_Product *product = C->product; 6134 Mat A = product->A, B = product->B; 6135 6136 PetscFunctionBegin; 6137 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6138 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6139 6140 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6141 C->ops->productsymbolic = MatProductSymbolic_AB; 6142 PetscFunctionReturn(PETSC_SUCCESS); 6143 } 6144 6145 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6146 { 6147 Mat_Product *product = C->product; 6148 6149 PetscFunctionBegin; 6150 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6151 PetscFunctionReturn(PETSC_SUCCESS); 6152 } 6153 6154 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6155 6156 Input Parameters: 6157 6158 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6159 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6160 6161 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6162 6163 For Set1, j1[] contains column indices of the nonzeros. 6164 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6165 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6166 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6167 6168 Similar for Set2. 6169 6170 This routine merges the two sets of nonzeros row by row and removes repeats. 6171 6172 Output Parameters: (memory is allocated by the caller) 6173 6174 i[],j[]: the CSR of the merged matrix, which has m rows. 6175 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6176 imap2[]: similar to imap1[], but for Set2. 6177 Note we order nonzeros row-by-row and from left to right. 6178 */ 6179 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6180 { 6181 PetscInt r, m; /* Row index of mat */ 6182 PetscCount t, t1, t2, b1, e1, b2, e2; 6183 6184 PetscFunctionBegin; 6185 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6186 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6187 i[0] = 0; 6188 for (r = 0; r < m; r++) { /* Do row by row merging */ 6189 b1 = rowBegin1[r]; 6190 e1 = rowEnd1[r]; 6191 b2 = rowBegin2[r]; 6192 e2 = rowEnd2[r]; 6193 while (b1 < e1 && b2 < e2) { 6194 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6195 j[t] = j1[b1]; 6196 imap1[t1] = t; 6197 imap2[t2] = t; 6198 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6199 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6200 t1++; 6201 t2++; 6202 t++; 6203 } else if (j1[b1] < j2[b2]) { 6204 j[t] = j1[b1]; 6205 imap1[t1] = t; 6206 b1 += jmap1[t1 + 1] - jmap1[t1]; 6207 t1++; 6208 t++; 6209 } else { 6210 j[t] = j2[b2]; 6211 imap2[t2] = t; 6212 b2 += jmap2[t2 + 1] - jmap2[t2]; 6213 t2++; 6214 t++; 6215 } 6216 } 6217 /* Merge the remaining in either j1[] or j2[] */ 6218 while (b1 < e1) { 6219 j[t] = j1[b1]; 6220 imap1[t1] = t; 6221 b1 += jmap1[t1 + 1] - jmap1[t1]; 6222 t1++; 6223 t++; 6224 } 6225 while (b2 < e2) { 6226 j[t] = j2[b2]; 6227 imap2[t2] = t; 6228 b2 += jmap2[t2 + 1] - jmap2[t2]; 6229 t2++; 6230 t++; 6231 } 6232 i[r + 1] = t; 6233 } 6234 PetscFunctionReturn(PETSC_SUCCESS); 6235 } 6236 6237 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6238 6239 Input Parameters: 6240 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6241 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6242 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6243 6244 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6245 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6246 6247 Output Parameters: 6248 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6249 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6250 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6251 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6252 6253 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6254 Atot: number of entries belonging to the diagonal block. 6255 Annz: number of unique nonzeros belonging to the diagonal block. 6256 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6257 repeats (i.e., same 'i,j' pair). 6258 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6259 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6260 6261 Atot: number of entries belonging to the diagonal block 6262 Annz: number of unique nonzeros belonging to the diagonal block. 6263 6264 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6265 6266 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6267 */ 6268 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6269 { 6270 PetscInt cstart, cend, rstart, rend, row, col; 6271 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6272 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6273 PetscCount k, m, p, q, r, s, mid; 6274 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6275 6276 PetscFunctionBegin; 6277 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6278 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6279 m = rend - rstart; 6280 6281 for (k = 0; k < n; k++) { 6282 if (i[k] >= 0) break; 6283 } /* Skip negative rows */ 6284 6285 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6286 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6287 */ 6288 while (k < n) { 6289 row = i[k]; 6290 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6291 for (s = k; s < n; s++) 6292 if (i[s] != row) break; 6293 for (p = k; p < s; p++) { 6294 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6295 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6296 } 6297 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6298 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6299 rowBegin[row - rstart] = k; 6300 rowMid[row - rstart] = mid; 6301 rowEnd[row - rstart] = s; 6302 6303 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6304 Atot += mid - k; 6305 Btot += s - mid; 6306 6307 /* Count unique nonzeros of this diag/offdiag row */ 6308 for (p = k; p < mid;) { 6309 col = j[p]; 6310 do { 6311 j[p] += PETSC_MAX_INT; 6312 p++; 6313 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6314 Annz++; 6315 } 6316 6317 for (p = mid; p < s;) { 6318 col = j[p]; 6319 do { 6320 p++; 6321 } while (p < s && j[p] == col); 6322 Bnnz++; 6323 } 6324 k = s; 6325 } 6326 6327 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6328 PetscCall(PetscMalloc1(Atot, &Aperm)); 6329 PetscCall(PetscMalloc1(Btot, &Bperm)); 6330 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6331 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6332 6333 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6334 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6335 for (r = 0; r < m; r++) { 6336 k = rowBegin[r]; 6337 mid = rowMid[r]; 6338 s = rowEnd[r]; 6339 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6340 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6341 Atot += mid - k; 6342 Btot += s - mid; 6343 6344 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6345 for (p = k; p < mid;) { 6346 col = j[p]; 6347 q = p; 6348 do { 6349 p++; 6350 } while (p < mid && j[p] == col); 6351 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6352 Annz++; 6353 } 6354 6355 for (p = mid; p < s;) { 6356 col = j[p]; 6357 q = p; 6358 do { 6359 p++; 6360 } while (p < s && j[p] == col); 6361 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6362 Bnnz++; 6363 } 6364 } 6365 /* Output */ 6366 *Aperm_ = Aperm; 6367 *Annz_ = Annz; 6368 *Atot_ = Atot; 6369 *Ajmap_ = Ajmap; 6370 *Bperm_ = Bperm; 6371 *Bnnz_ = Bnnz; 6372 *Btot_ = Btot; 6373 *Bjmap_ = Bjmap; 6374 PetscFunctionReturn(PETSC_SUCCESS); 6375 } 6376 6377 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6378 6379 Input Parameters: 6380 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6381 nnz: number of unique nonzeros in the merged matrix 6382 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6383 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6384 6385 Output Parameter: (memory is allocated by the caller) 6386 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6387 6388 Example: 6389 nnz1 = 4 6390 nnz = 6 6391 imap = [1,3,4,5] 6392 jmap = [0,3,5,6,7] 6393 then, 6394 jmap_new = [0,0,3,3,5,6,7] 6395 */ 6396 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6397 { 6398 PetscCount k, p; 6399 6400 PetscFunctionBegin; 6401 jmap_new[0] = 0; 6402 p = nnz; /* p loops over jmap_new[] backwards */ 6403 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6404 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6405 } 6406 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6407 PetscFunctionReturn(PETSC_SUCCESS); 6408 } 6409 6410 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6411 { 6412 MPI_Comm comm; 6413 PetscMPIInt rank, size; 6414 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6415 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6416 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6417 6418 PetscFunctionBegin; 6419 PetscCall(PetscFree(mpiaij->garray)); 6420 PetscCall(VecDestroy(&mpiaij->lvec)); 6421 #if defined(PETSC_USE_CTABLE) 6422 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6423 #else 6424 PetscCall(PetscFree(mpiaij->colmap)); 6425 #endif 6426 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6427 mat->assembled = PETSC_FALSE; 6428 mat->was_assembled = PETSC_FALSE; 6429 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6430 6431 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6432 PetscCallMPI(MPI_Comm_size(comm, &size)); 6433 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6434 PetscCall(PetscLayoutSetUp(mat->rmap)); 6435 PetscCall(PetscLayoutSetUp(mat->cmap)); 6436 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6437 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6438 PetscCall(MatGetLocalSize(mat, &m, &n)); 6439 PetscCall(MatGetSize(mat, &M, &N)); 6440 6441 /* ---------------------------------------------------------------------------*/ 6442 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6443 /* entries come first, then local rows, then remote rows. */ 6444 /* ---------------------------------------------------------------------------*/ 6445 PetscCount n1 = coo_n, *perm1; 6446 PetscInt *i1 = coo_i, *j1 = coo_j; 6447 6448 PetscCall(PetscMalloc1(n1, &perm1)); 6449 for (k = 0; k < n1; k++) perm1[k] = k; 6450 6451 /* Manipulate indices so that entries with negative row or col indices will have smallest 6452 row indices, local entries will have greater but negative row indices, and remote entries 6453 will have positive row indices. 6454 */ 6455 for (k = 0; k < n1; k++) { 6456 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6457 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6458 else { 6459 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6460 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6461 } 6462 } 6463 6464 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6465 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6466 for (k = 0; k < n1; k++) { 6467 if (i1[k] > PETSC_MIN_INT) break; 6468 } /* Advance k to the first entry we need to take care of */ 6469 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6470 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6471 6472 /* ---------------------------------------------------------------------------*/ 6473 /* Split local rows into diag/offdiag portions */ 6474 /* ---------------------------------------------------------------------------*/ 6475 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6476 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6477 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6478 6479 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6480 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6481 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6482 6483 /* ---------------------------------------------------------------------------*/ 6484 /* Send remote rows to their owner */ 6485 /* ---------------------------------------------------------------------------*/ 6486 /* Find which rows should be sent to which remote ranks*/ 6487 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6488 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6489 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6490 const PetscInt *ranges; 6491 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6492 6493 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6494 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6495 for (k = rem; k < n1;) { 6496 PetscMPIInt owner; 6497 PetscInt firstRow, lastRow; 6498 6499 /* Locate a row range */ 6500 firstRow = i1[k]; /* first row of this owner */ 6501 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6502 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6503 6504 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6505 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6506 6507 /* All entries in [k,p) belong to this remote owner */ 6508 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6509 PetscMPIInt *sendto2; 6510 PetscInt *nentries2; 6511 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6512 6513 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6514 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6515 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6516 PetscCall(PetscFree2(sendto, nentries2)); 6517 sendto = sendto2; 6518 nentries = nentries2; 6519 maxNsend = maxNsend2; 6520 } 6521 sendto[nsend] = owner; 6522 nentries[nsend] = p - k; 6523 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6524 nsend++; 6525 k = p; 6526 } 6527 6528 /* Build 1st SF to know offsets on remote to send data */ 6529 PetscSF sf1; 6530 PetscInt nroots = 1, nroots2 = 0; 6531 PetscInt nleaves = nsend, nleaves2 = 0; 6532 PetscInt *offsets; 6533 PetscSFNode *iremote; 6534 6535 PetscCall(PetscSFCreate(comm, &sf1)); 6536 PetscCall(PetscMalloc1(nsend, &iremote)); 6537 PetscCall(PetscMalloc1(nsend, &offsets)); 6538 for (k = 0; k < nsend; k++) { 6539 iremote[k].rank = sendto[k]; 6540 iremote[k].index = 0; 6541 nleaves2 += nentries[k]; 6542 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6543 } 6544 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6545 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6546 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6547 PetscCall(PetscSFDestroy(&sf1)); 6548 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6549 6550 /* Build 2nd SF to send remote COOs to their owner */ 6551 PetscSF sf2; 6552 nroots = nroots2; 6553 nleaves = nleaves2; 6554 PetscCall(PetscSFCreate(comm, &sf2)); 6555 PetscCall(PetscSFSetFromOptions(sf2)); 6556 PetscCall(PetscMalloc1(nleaves, &iremote)); 6557 p = 0; 6558 for (k = 0; k < nsend; k++) { 6559 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6560 for (q = 0; q < nentries[k]; q++, p++) { 6561 iremote[p].rank = sendto[k]; 6562 iremote[p].index = offsets[k] + q; 6563 } 6564 } 6565 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6566 6567 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6568 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6569 6570 /* Send the remote COOs to their owner */ 6571 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6572 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6573 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6574 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6575 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6576 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6577 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6578 6579 PetscCall(PetscFree(offsets)); 6580 PetscCall(PetscFree2(sendto, nentries)); 6581 6582 /* ---------------------------------------------------------------*/ 6583 /* Sort received COOs by row along with the permutation array */ 6584 /* ---------------------------------------------------------------*/ 6585 for (k = 0; k < n2; k++) perm2[k] = k; 6586 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6587 6588 /* ---------------------------------------------------------------*/ 6589 /* Split received COOs into diag/offdiag portions */ 6590 /* ---------------------------------------------------------------*/ 6591 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6592 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6593 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6594 6595 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6596 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6597 6598 /* --------------------------------------------------------------------------*/ 6599 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6600 /* --------------------------------------------------------------------------*/ 6601 PetscInt *Ai, *Bi; 6602 PetscInt *Aj, *Bj; 6603 6604 PetscCall(PetscMalloc1(m + 1, &Ai)); 6605 PetscCall(PetscMalloc1(m + 1, &Bi)); 6606 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6607 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6608 6609 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6610 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6611 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6612 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6613 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6614 6615 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6616 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6617 6618 /* --------------------------------------------------------------------------*/ 6619 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6620 /* expect nonzeros in A/B most likely have local contributing entries */ 6621 /* --------------------------------------------------------------------------*/ 6622 PetscInt Annz = Ai[m]; 6623 PetscInt Bnnz = Bi[m]; 6624 PetscCount *Ajmap1_new, *Bjmap1_new; 6625 6626 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6627 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6628 6629 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6630 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6631 6632 PetscCall(PetscFree(Aimap1)); 6633 PetscCall(PetscFree(Ajmap1)); 6634 PetscCall(PetscFree(Bimap1)); 6635 PetscCall(PetscFree(Bjmap1)); 6636 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6637 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6638 PetscCall(PetscFree(perm1)); 6639 PetscCall(PetscFree3(i2, j2, perm2)); 6640 6641 Ajmap1 = Ajmap1_new; 6642 Bjmap1 = Bjmap1_new; 6643 6644 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6645 if (Annz < Annz1 + Annz2) { 6646 PetscInt *Aj_new; 6647 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6648 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6649 PetscCall(PetscFree(Aj)); 6650 Aj = Aj_new; 6651 } 6652 6653 if (Bnnz < Bnnz1 + Bnnz2) { 6654 PetscInt *Bj_new; 6655 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6656 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6657 PetscCall(PetscFree(Bj)); 6658 Bj = Bj_new; 6659 } 6660 6661 /* --------------------------------------------------------------------------------*/ 6662 /* Create new submatrices for on-process and off-process coupling */ 6663 /* --------------------------------------------------------------------------------*/ 6664 PetscScalar *Aa, *Ba; 6665 MatType rtype; 6666 Mat_SeqAIJ *a, *b; 6667 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6668 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6669 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6670 if (cstart) { 6671 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6672 } 6673 PetscCall(MatDestroy(&mpiaij->A)); 6674 PetscCall(MatDestroy(&mpiaij->B)); 6675 PetscCall(MatGetRootType_Private(mat, &rtype)); 6676 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6677 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6678 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6679 6680 a = (Mat_SeqAIJ *)mpiaij->A->data; 6681 b = (Mat_SeqAIJ *)mpiaij->B->data; 6682 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6683 a->free_a = b->free_a = PETSC_TRUE; 6684 a->free_ij = b->free_ij = PETSC_TRUE; 6685 6686 /* conversion must happen AFTER multiply setup */ 6687 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6688 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6689 PetscCall(VecDestroy(&mpiaij->lvec)); 6690 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6691 6692 mpiaij->coo_n = coo_n; 6693 mpiaij->coo_sf = sf2; 6694 mpiaij->sendlen = nleaves; 6695 mpiaij->recvlen = nroots; 6696 6697 mpiaij->Annz = Annz; 6698 mpiaij->Bnnz = Bnnz; 6699 6700 mpiaij->Annz2 = Annz2; 6701 mpiaij->Bnnz2 = Bnnz2; 6702 6703 mpiaij->Atot1 = Atot1; 6704 mpiaij->Atot2 = Atot2; 6705 mpiaij->Btot1 = Btot1; 6706 mpiaij->Btot2 = Btot2; 6707 6708 mpiaij->Ajmap1 = Ajmap1; 6709 mpiaij->Aperm1 = Aperm1; 6710 6711 mpiaij->Bjmap1 = Bjmap1; 6712 mpiaij->Bperm1 = Bperm1; 6713 6714 mpiaij->Aimap2 = Aimap2; 6715 mpiaij->Ajmap2 = Ajmap2; 6716 mpiaij->Aperm2 = Aperm2; 6717 6718 mpiaij->Bimap2 = Bimap2; 6719 mpiaij->Bjmap2 = Bjmap2; 6720 mpiaij->Bperm2 = Bperm2; 6721 6722 mpiaij->Cperm1 = Cperm1; 6723 6724 /* Allocate in preallocation. If not used, it has zero cost on host */ 6725 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6726 PetscFunctionReturn(PETSC_SUCCESS); 6727 } 6728 6729 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6730 { 6731 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6732 Mat A = mpiaij->A, B = mpiaij->B; 6733 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6734 PetscScalar *Aa, *Ba; 6735 PetscScalar *sendbuf = mpiaij->sendbuf; 6736 PetscScalar *recvbuf = mpiaij->recvbuf; 6737 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6738 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6739 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6740 const PetscCount *Cperm1 = mpiaij->Cperm1; 6741 6742 PetscFunctionBegin; 6743 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6744 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6745 6746 /* Pack entries to be sent to remote */ 6747 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6748 6749 /* Send remote entries to their owner and overlap the communication with local computation */ 6750 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6751 /* Add local entries to A and B */ 6752 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6753 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6754 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6755 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6756 } 6757 for (PetscCount i = 0; i < Bnnz; i++) { 6758 PetscScalar sum = 0.0; 6759 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6760 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6761 } 6762 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6763 6764 /* Add received remote entries to A and B */ 6765 for (PetscCount i = 0; i < Annz2; i++) { 6766 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6767 } 6768 for (PetscCount i = 0; i < Bnnz2; i++) { 6769 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6770 } 6771 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6772 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6773 PetscFunctionReturn(PETSC_SUCCESS); 6774 } 6775 6776 /* ----------------------------------------------------------------*/ 6777 6778 /*MC 6779 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6780 6781 Options Database Keys: 6782 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6783 6784 Level: beginner 6785 6786 Notes: 6787 `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values, 6788 in this case the values associated with the rows and columns one passes in are set to zero 6789 in the matrix 6790 6791 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6792 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6793 6794 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6795 M*/ 6796 6797 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6798 { 6799 Mat_MPIAIJ *b; 6800 PetscMPIInt size; 6801 6802 PetscFunctionBegin; 6803 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6804 6805 PetscCall(PetscNew(&b)); 6806 B->data = (void *)b; 6807 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6808 B->assembled = PETSC_FALSE; 6809 B->insertmode = NOT_SET_VALUES; 6810 b->size = size; 6811 6812 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6813 6814 /* build cache for off array entries formed */ 6815 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6816 6817 b->donotstash = PETSC_FALSE; 6818 b->colmap = NULL; 6819 b->garray = NULL; 6820 b->roworiented = PETSC_TRUE; 6821 6822 /* stuff used for matrix vector multiply */ 6823 b->lvec = NULL; 6824 b->Mvctx = NULL; 6825 6826 /* stuff for MatGetRow() */ 6827 b->rowindices = NULL; 6828 b->rowvalues = NULL; 6829 b->getrowactive = PETSC_FALSE; 6830 6831 /* flexible pointer used in CUSPARSE classes */ 6832 b->spptr = NULL; 6833 6834 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6837 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6844 #if defined(PETSC_HAVE_CUDA) 6845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6846 #endif 6847 #if defined(PETSC_HAVE_HIP) 6848 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6849 #endif 6850 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6851 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6852 #endif 6853 #if defined(PETSC_HAVE_MKL_SPARSE) 6854 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6855 #endif 6856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6859 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6860 #if defined(PETSC_HAVE_ELEMENTAL) 6861 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6862 #endif 6863 #if defined(PETSC_HAVE_SCALAPACK) 6864 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6865 #endif 6866 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6867 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6868 #if defined(PETSC_HAVE_HYPRE) 6869 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6870 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6871 #endif 6872 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6873 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6874 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6875 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6876 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6877 PetscFunctionReturn(PETSC_SUCCESS); 6878 } 6879 6880 /*@C 6881 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6882 and "off-diagonal" part of the matrix in CSR format. 6883 6884 Collective 6885 6886 Input Parameters: 6887 + comm - MPI communicator 6888 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6889 . n - This value should be the same as the local size used in creating the 6890 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6891 calculated if N is given) For square matrices n is almost always m. 6892 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 6893 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 6894 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6895 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6896 . a - matrix values 6897 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6898 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6899 - oa - matrix values 6900 6901 Output Parameter: 6902 . mat - the matrix 6903 6904 Level: advanced 6905 6906 Notes: 6907 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6908 must free the arrays once the matrix has been destroyed and not before. 6909 6910 The i and j indices are 0 based 6911 6912 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6913 6914 This sets local rows and cannot be used to set off-processor values. 6915 6916 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6917 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6918 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6919 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6920 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6921 communication if it is known that only local entries will be set. 6922 6923 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6924 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6925 @*/ 6926 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6927 { 6928 Mat_MPIAIJ *maij; 6929 6930 PetscFunctionBegin; 6931 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6932 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6933 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6934 PetscCall(MatCreate(comm, mat)); 6935 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6936 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6937 maij = (Mat_MPIAIJ *)(*mat)->data; 6938 6939 (*mat)->preallocated = PETSC_TRUE; 6940 6941 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6942 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6943 6944 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6945 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6946 6947 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6948 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6949 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6950 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6951 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6952 PetscFunctionReturn(PETSC_SUCCESS); 6953 } 6954 6955 typedef struct { 6956 Mat *mp; /* intermediate products */ 6957 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6958 PetscInt cp; /* number of intermediate products */ 6959 6960 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6961 PetscInt *startsj_s, *startsj_r; 6962 PetscScalar *bufa; 6963 Mat P_oth; 6964 6965 /* may take advantage of merging product->B */ 6966 Mat Bloc; /* B-local by merging diag and off-diag */ 6967 6968 /* cusparse does not have support to split between symbolic and numeric phases. 6969 When api_user is true, we don't need to update the numerical values 6970 of the temporary storage */ 6971 PetscBool reusesym; 6972 6973 /* support for COO values insertion */ 6974 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6975 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6976 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6977 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6978 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6979 PetscMemType mtype; 6980 6981 /* customization */ 6982 PetscBool abmerge; 6983 PetscBool P_oth_bind; 6984 } MatMatMPIAIJBACKEND; 6985 6986 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6987 { 6988 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6989 PetscInt i; 6990 6991 PetscFunctionBegin; 6992 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6993 PetscCall(PetscFree(mmdata->bufa)); 6994 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6995 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6996 PetscCall(MatDestroy(&mmdata->P_oth)); 6997 PetscCall(MatDestroy(&mmdata->Bloc)); 6998 PetscCall(PetscSFDestroy(&mmdata->sf)); 6999 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7000 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7001 PetscCall(PetscFree(mmdata->own[0])); 7002 PetscCall(PetscFree(mmdata->own)); 7003 PetscCall(PetscFree(mmdata->off[0])); 7004 PetscCall(PetscFree(mmdata->off)); 7005 PetscCall(PetscFree(mmdata)); 7006 PetscFunctionReturn(PETSC_SUCCESS); 7007 } 7008 7009 /* Copy selected n entries with indices in idx[] of A to v[]. 7010 If idx is NULL, copy the whole data array of A to v[] 7011 */ 7012 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7013 { 7014 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7015 7016 PetscFunctionBegin; 7017 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7018 if (f) { 7019 PetscCall((*f)(A, n, idx, v)); 7020 } else { 7021 const PetscScalar *vv; 7022 7023 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7024 if (n && idx) { 7025 PetscScalar *w = v; 7026 const PetscInt *oi = idx; 7027 PetscInt j; 7028 7029 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7030 } else { 7031 PetscCall(PetscArraycpy(v, vv, n)); 7032 } 7033 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7034 } 7035 PetscFunctionReturn(PETSC_SUCCESS); 7036 } 7037 7038 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7039 { 7040 MatMatMPIAIJBACKEND *mmdata; 7041 PetscInt i, n_d, n_o; 7042 7043 PetscFunctionBegin; 7044 MatCheckProduct(C, 1); 7045 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7046 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7047 if (!mmdata->reusesym) { /* update temporary matrices */ 7048 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7049 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7050 } 7051 mmdata->reusesym = PETSC_FALSE; 7052 7053 for (i = 0; i < mmdata->cp; i++) { 7054 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7055 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7056 } 7057 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7058 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7059 7060 if (mmdata->mptmp[i]) continue; 7061 if (noff) { 7062 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7063 7064 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7065 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7066 n_o += noff; 7067 n_d += nown; 7068 } else { 7069 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7070 7071 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7072 n_d += mm->nz; 7073 } 7074 } 7075 if (mmdata->hasoffproc) { /* offprocess insertion */ 7076 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7077 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7078 } 7079 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7080 PetscFunctionReturn(PETSC_SUCCESS); 7081 } 7082 7083 /* Support for Pt * A, A * P, or Pt * A * P */ 7084 #define MAX_NUMBER_INTERMEDIATE 4 7085 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7086 { 7087 Mat_Product *product = C->product; 7088 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7089 Mat_MPIAIJ *a, *p; 7090 MatMatMPIAIJBACKEND *mmdata; 7091 ISLocalToGlobalMapping P_oth_l2g = NULL; 7092 IS glob = NULL; 7093 const char *prefix; 7094 char pprefix[256]; 7095 const PetscInt *globidx, *P_oth_idx; 7096 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7097 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7098 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7099 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7100 /* a base offset; type-2: sparse with a local to global map table */ 7101 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7102 7103 MatProductType ptype; 7104 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7105 PetscMPIInt size; 7106 7107 PetscFunctionBegin; 7108 MatCheckProduct(C, 1); 7109 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7110 ptype = product->type; 7111 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7112 ptype = MATPRODUCT_AB; 7113 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7114 } 7115 switch (ptype) { 7116 case MATPRODUCT_AB: 7117 A = product->A; 7118 P = product->B; 7119 m = A->rmap->n; 7120 n = P->cmap->n; 7121 M = A->rmap->N; 7122 N = P->cmap->N; 7123 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7124 break; 7125 case MATPRODUCT_AtB: 7126 P = product->A; 7127 A = product->B; 7128 m = P->cmap->n; 7129 n = A->cmap->n; 7130 M = P->cmap->N; 7131 N = A->cmap->N; 7132 hasoffproc = PETSC_TRUE; 7133 break; 7134 case MATPRODUCT_PtAP: 7135 A = product->A; 7136 P = product->B; 7137 m = P->cmap->n; 7138 n = P->cmap->n; 7139 M = P->cmap->N; 7140 N = P->cmap->N; 7141 hasoffproc = PETSC_TRUE; 7142 break; 7143 default: 7144 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7145 } 7146 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7147 if (size == 1) hasoffproc = PETSC_FALSE; 7148 7149 /* defaults */ 7150 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7151 mp[i] = NULL; 7152 mptmp[i] = PETSC_FALSE; 7153 rmapt[i] = -1; 7154 cmapt[i] = -1; 7155 rmapa[i] = NULL; 7156 cmapa[i] = NULL; 7157 } 7158 7159 /* customization */ 7160 PetscCall(PetscNew(&mmdata)); 7161 mmdata->reusesym = product->api_user; 7162 if (ptype == MATPRODUCT_AB) { 7163 if (product->api_user) { 7164 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7165 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7166 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7167 PetscOptionsEnd(); 7168 } else { 7169 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7170 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7171 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7172 PetscOptionsEnd(); 7173 } 7174 } else if (ptype == MATPRODUCT_PtAP) { 7175 if (product->api_user) { 7176 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7177 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7178 PetscOptionsEnd(); 7179 } else { 7180 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7181 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7182 PetscOptionsEnd(); 7183 } 7184 } 7185 a = (Mat_MPIAIJ *)A->data; 7186 p = (Mat_MPIAIJ *)P->data; 7187 PetscCall(MatSetSizes(C, m, n, M, N)); 7188 PetscCall(PetscLayoutSetUp(C->rmap)); 7189 PetscCall(PetscLayoutSetUp(C->cmap)); 7190 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7191 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7192 7193 cp = 0; 7194 switch (ptype) { 7195 case MATPRODUCT_AB: /* A * P */ 7196 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7197 7198 /* A_diag * P_local (merged or not) */ 7199 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7200 /* P is product->B */ 7201 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7202 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7203 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7204 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7205 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7206 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7207 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7208 mp[cp]->product->api_user = product->api_user; 7209 PetscCall(MatProductSetFromOptions(mp[cp])); 7210 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7211 PetscCall(ISGetIndices(glob, &globidx)); 7212 rmapt[cp] = 1; 7213 cmapt[cp] = 2; 7214 cmapa[cp] = globidx; 7215 mptmp[cp] = PETSC_FALSE; 7216 cp++; 7217 } else { /* A_diag * P_diag and A_diag * P_off */ 7218 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7219 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7220 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7221 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7222 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7223 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7224 mp[cp]->product->api_user = product->api_user; 7225 PetscCall(MatProductSetFromOptions(mp[cp])); 7226 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7227 rmapt[cp] = 1; 7228 cmapt[cp] = 1; 7229 mptmp[cp] = PETSC_FALSE; 7230 cp++; 7231 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7232 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7233 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7234 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7235 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7236 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7237 mp[cp]->product->api_user = product->api_user; 7238 PetscCall(MatProductSetFromOptions(mp[cp])); 7239 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7240 rmapt[cp] = 1; 7241 cmapt[cp] = 2; 7242 cmapa[cp] = p->garray; 7243 mptmp[cp] = PETSC_FALSE; 7244 cp++; 7245 } 7246 7247 /* A_off * P_other */ 7248 if (mmdata->P_oth) { 7249 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7250 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7251 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7252 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7253 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7254 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7255 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7256 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7257 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7258 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7259 mp[cp]->product->api_user = product->api_user; 7260 PetscCall(MatProductSetFromOptions(mp[cp])); 7261 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7262 rmapt[cp] = 1; 7263 cmapt[cp] = 2; 7264 cmapa[cp] = P_oth_idx; 7265 mptmp[cp] = PETSC_FALSE; 7266 cp++; 7267 } 7268 break; 7269 7270 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7271 /* A is product->B */ 7272 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7273 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7274 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7275 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7276 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7277 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7278 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7279 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7280 mp[cp]->product->api_user = product->api_user; 7281 PetscCall(MatProductSetFromOptions(mp[cp])); 7282 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7283 PetscCall(ISGetIndices(glob, &globidx)); 7284 rmapt[cp] = 2; 7285 rmapa[cp] = globidx; 7286 cmapt[cp] = 2; 7287 cmapa[cp] = globidx; 7288 mptmp[cp] = PETSC_FALSE; 7289 cp++; 7290 } else { 7291 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7292 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7293 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7294 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7295 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7296 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7297 mp[cp]->product->api_user = product->api_user; 7298 PetscCall(MatProductSetFromOptions(mp[cp])); 7299 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7300 PetscCall(ISGetIndices(glob, &globidx)); 7301 rmapt[cp] = 1; 7302 cmapt[cp] = 2; 7303 cmapa[cp] = globidx; 7304 mptmp[cp] = PETSC_FALSE; 7305 cp++; 7306 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7307 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7308 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7309 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7310 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7311 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7312 mp[cp]->product->api_user = product->api_user; 7313 PetscCall(MatProductSetFromOptions(mp[cp])); 7314 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7315 rmapt[cp] = 2; 7316 rmapa[cp] = p->garray; 7317 cmapt[cp] = 2; 7318 cmapa[cp] = globidx; 7319 mptmp[cp] = PETSC_FALSE; 7320 cp++; 7321 } 7322 break; 7323 case MATPRODUCT_PtAP: 7324 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7325 /* P is product->B */ 7326 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7327 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7328 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7329 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7330 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7331 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7332 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7333 mp[cp]->product->api_user = product->api_user; 7334 PetscCall(MatProductSetFromOptions(mp[cp])); 7335 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7336 PetscCall(ISGetIndices(glob, &globidx)); 7337 rmapt[cp] = 2; 7338 rmapa[cp] = globidx; 7339 cmapt[cp] = 2; 7340 cmapa[cp] = globidx; 7341 mptmp[cp] = PETSC_FALSE; 7342 cp++; 7343 if (mmdata->P_oth) { 7344 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7345 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7346 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7347 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7348 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7349 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7350 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7351 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7352 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7353 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7354 mp[cp]->product->api_user = product->api_user; 7355 PetscCall(MatProductSetFromOptions(mp[cp])); 7356 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7357 mptmp[cp] = PETSC_TRUE; 7358 cp++; 7359 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7360 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7361 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7362 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7363 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7364 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7365 mp[cp]->product->api_user = product->api_user; 7366 PetscCall(MatProductSetFromOptions(mp[cp])); 7367 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7368 rmapt[cp] = 2; 7369 rmapa[cp] = globidx; 7370 cmapt[cp] = 2; 7371 cmapa[cp] = P_oth_idx; 7372 mptmp[cp] = PETSC_FALSE; 7373 cp++; 7374 } 7375 break; 7376 default: 7377 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7378 } 7379 /* sanity check */ 7380 if (size > 1) 7381 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7382 7383 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7384 for (i = 0; i < cp; i++) { 7385 mmdata->mp[i] = mp[i]; 7386 mmdata->mptmp[i] = mptmp[i]; 7387 } 7388 mmdata->cp = cp; 7389 C->product->data = mmdata; 7390 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7391 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7392 7393 /* memory type */ 7394 mmdata->mtype = PETSC_MEMTYPE_HOST; 7395 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7396 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7397 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7398 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7399 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7400 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7401 7402 /* prepare coo coordinates for values insertion */ 7403 7404 /* count total nonzeros of those intermediate seqaij Mats 7405 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7406 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7407 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7408 */ 7409 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7410 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7411 if (mptmp[cp]) continue; 7412 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7413 const PetscInt *rmap = rmapa[cp]; 7414 const PetscInt mr = mp[cp]->rmap->n; 7415 const PetscInt rs = C->rmap->rstart; 7416 const PetscInt re = C->rmap->rend; 7417 const PetscInt *ii = mm->i; 7418 for (i = 0; i < mr; i++) { 7419 const PetscInt gr = rmap[i]; 7420 const PetscInt nz = ii[i + 1] - ii[i]; 7421 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7422 else ncoo_oown += nz; /* this row is local */ 7423 } 7424 } else ncoo_d += mm->nz; 7425 } 7426 7427 /* 7428 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7429 7430 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7431 7432 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7433 7434 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7435 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7436 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7437 7438 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7439 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7440 */ 7441 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7442 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7443 7444 /* gather (i,j) of nonzeros inserted by remote procs */ 7445 if (hasoffproc) { 7446 PetscSF msf; 7447 PetscInt ncoo2, *coo_i2, *coo_j2; 7448 7449 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7450 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7451 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7452 7453 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7454 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7455 PetscInt *idxoff = mmdata->off[cp]; 7456 PetscInt *idxown = mmdata->own[cp]; 7457 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7458 const PetscInt *rmap = rmapa[cp]; 7459 const PetscInt *cmap = cmapa[cp]; 7460 const PetscInt *ii = mm->i; 7461 PetscInt *coi = coo_i + ncoo_o; 7462 PetscInt *coj = coo_j + ncoo_o; 7463 const PetscInt mr = mp[cp]->rmap->n; 7464 const PetscInt rs = C->rmap->rstart; 7465 const PetscInt re = C->rmap->rend; 7466 const PetscInt cs = C->cmap->rstart; 7467 for (i = 0; i < mr; i++) { 7468 const PetscInt *jj = mm->j + ii[i]; 7469 const PetscInt gr = rmap[i]; 7470 const PetscInt nz = ii[i + 1] - ii[i]; 7471 if (gr < rs || gr >= re) { /* this is an offproc row */ 7472 for (j = ii[i]; j < ii[i + 1]; j++) { 7473 *coi++ = gr; 7474 *idxoff++ = j; 7475 } 7476 if (!cmapt[cp]) { /* already global */ 7477 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7478 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7479 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7480 } else { /* offdiag */ 7481 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7482 } 7483 ncoo_o += nz; 7484 } else { /* this is a local row */ 7485 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7486 } 7487 } 7488 } 7489 mmdata->off[cp + 1] = idxoff; 7490 mmdata->own[cp + 1] = idxown; 7491 } 7492 7493 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7494 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7495 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7496 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7497 ncoo = ncoo_d + ncoo_oown + ncoo2; 7498 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7499 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7500 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7501 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7502 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7503 PetscCall(PetscFree2(coo_i, coo_j)); 7504 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7505 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7506 coo_i = coo_i2; 7507 coo_j = coo_j2; 7508 } else { /* no offproc values insertion */ 7509 ncoo = ncoo_d; 7510 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7511 7512 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7513 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7514 PetscCall(PetscSFSetUp(mmdata->sf)); 7515 } 7516 mmdata->hasoffproc = hasoffproc; 7517 7518 /* gather (i,j) of nonzeros inserted locally */ 7519 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7520 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7521 PetscInt *coi = coo_i + ncoo_d; 7522 PetscInt *coj = coo_j + ncoo_d; 7523 const PetscInt *jj = mm->j; 7524 const PetscInt *ii = mm->i; 7525 const PetscInt *cmap = cmapa[cp]; 7526 const PetscInt *rmap = rmapa[cp]; 7527 const PetscInt mr = mp[cp]->rmap->n; 7528 const PetscInt rs = C->rmap->rstart; 7529 const PetscInt re = C->rmap->rend; 7530 const PetscInt cs = C->cmap->rstart; 7531 7532 if (mptmp[cp]) continue; 7533 if (rmapt[cp] == 1) { /* consecutive rows */ 7534 /* fill coo_i */ 7535 for (i = 0; i < mr; i++) { 7536 const PetscInt gr = i + rs; 7537 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7538 } 7539 /* fill coo_j */ 7540 if (!cmapt[cp]) { /* type-0, already global */ 7541 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7542 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7543 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7544 } else { /* type-2, local to global for sparse columns */ 7545 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7546 } 7547 ncoo_d += mm->nz; 7548 } else if (rmapt[cp] == 2) { /* sparse rows */ 7549 for (i = 0; i < mr; i++) { 7550 const PetscInt *jj = mm->j + ii[i]; 7551 const PetscInt gr = rmap[i]; 7552 const PetscInt nz = ii[i + 1] - ii[i]; 7553 if (gr >= rs && gr < re) { /* local rows */ 7554 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7555 if (!cmapt[cp]) { /* type-0, already global */ 7556 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7557 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7558 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7559 } else { /* type-2, local to global for sparse columns */ 7560 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7561 } 7562 ncoo_d += nz; 7563 } 7564 } 7565 } 7566 } 7567 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7568 PetscCall(ISDestroy(&glob)); 7569 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7570 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7571 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7572 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7573 7574 /* preallocate with COO data */ 7575 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7576 PetscCall(PetscFree2(coo_i, coo_j)); 7577 PetscFunctionReturn(PETSC_SUCCESS); 7578 } 7579 7580 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7581 { 7582 Mat_Product *product = mat->product; 7583 #if defined(PETSC_HAVE_DEVICE) 7584 PetscBool match = PETSC_FALSE; 7585 PetscBool usecpu = PETSC_FALSE; 7586 #else 7587 PetscBool match = PETSC_TRUE; 7588 #endif 7589 7590 PetscFunctionBegin; 7591 MatCheckProduct(mat, 1); 7592 #if defined(PETSC_HAVE_DEVICE) 7593 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7594 if (match) { /* we can always fallback to the CPU if requested */ 7595 switch (product->type) { 7596 case MATPRODUCT_AB: 7597 if (product->api_user) { 7598 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7599 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7600 PetscOptionsEnd(); 7601 } else { 7602 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7603 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7604 PetscOptionsEnd(); 7605 } 7606 break; 7607 case MATPRODUCT_AtB: 7608 if (product->api_user) { 7609 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7610 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7611 PetscOptionsEnd(); 7612 } else { 7613 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7614 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7615 PetscOptionsEnd(); 7616 } 7617 break; 7618 case MATPRODUCT_PtAP: 7619 if (product->api_user) { 7620 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7621 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7622 PetscOptionsEnd(); 7623 } else { 7624 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7625 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7626 PetscOptionsEnd(); 7627 } 7628 break; 7629 default: 7630 break; 7631 } 7632 match = (PetscBool)!usecpu; 7633 } 7634 #endif 7635 if (match) { 7636 switch (product->type) { 7637 case MATPRODUCT_AB: 7638 case MATPRODUCT_AtB: 7639 case MATPRODUCT_PtAP: 7640 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7641 break; 7642 default: 7643 break; 7644 } 7645 } 7646 /* fallback to MPIAIJ ops */ 7647 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7648 PetscFunctionReturn(PETSC_SUCCESS); 7649 } 7650 7651 /* 7652 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7653 7654 n - the number of block indices in cc[] 7655 cc - the block indices (must be large enough to contain the indices) 7656 */ 7657 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7658 { 7659 PetscInt cnt = -1, nidx, j; 7660 const PetscInt *idx; 7661 7662 PetscFunctionBegin; 7663 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7664 if (nidx) { 7665 cnt = 0; 7666 cc[cnt] = idx[0] / bs; 7667 for (j = 1; j < nidx; j++) { 7668 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7669 } 7670 } 7671 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7672 *n = cnt + 1; 7673 PetscFunctionReturn(PETSC_SUCCESS); 7674 } 7675 7676 /* 7677 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7678 7679 ncollapsed - the number of block indices 7680 collapsed - the block indices (must be large enough to contain the indices) 7681 */ 7682 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7683 { 7684 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7685 7686 PetscFunctionBegin; 7687 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7688 for (i = start + 1; i < start + bs; i++) { 7689 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7690 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7691 cprevtmp = cprev; 7692 cprev = merged; 7693 merged = cprevtmp; 7694 } 7695 *ncollapsed = nprev; 7696 if (collapsed) *collapsed = cprev; 7697 PetscFunctionReturn(PETSC_SUCCESS); 7698 } 7699 7700 /* 7701 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7702 */ 7703 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7704 { 7705 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7706 Mat tGmat; 7707 MPI_Comm comm; 7708 const PetscScalar *vals; 7709 const PetscInt *idx; 7710 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7711 MatScalar *AA; // this is checked in graph 7712 PetscBool isseqaij; 7713 Mat a, b, c; 7714 MatType jtype; 7715 7716 PetscFunctionBegin; 7717 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7718 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7719 PetscCall(MatGetType(Gmat, &jtype)); 7720 PetscCall(MatCreate(comm, &tGmat)); 7721 PetscCall(MatSetType(tGmat, jtype)); 7722 7723 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7724 Also, if the matrix is symmetric, can we skip this 7725 operation? It can be very expensive on large matrices. */ 7726 7727 // global sizes 7728 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7729 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7730 nloc = Iend - Istart; 7731 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7732 if (isseqaij) { 7733 a = Gmat; 7734 b = NULL; 7735 } else { 7736 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7737 a = d->A; 7738 b = d->B; 7739 garray = d->garray; 7740 } 7741 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7742 for (PetscInt row = 0; row < nloc; row++) { 7743 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7744 d_nnz[row] = ncols; 7745 if (ncols > maxcols) maxcols = ncols; 7746 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7747 } 7748 if (b) { 7749 for (PetscInt row = 0; row < nloc; row++) { 7750 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7751 o_nnz[row] = ncols; 7752 if (ncols > maxcols) maxcols = ncols; 7753 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7754 } 7755 } 7756 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7757 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7758 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7759 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7760 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7761 PetscCall(PetscFree2(d_nnz, o_nnz)); 7762 // 7763 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7764 nnz0 = nnz1 = 0; 7765 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7766 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7767 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7768 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7769 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7770 if (PetscRealPart(sv) > vfilter) { 7771 nnz1++; 7772 PetscInt cid = idx[jj] + Istart; //diag 7773 if (c != a) cid = garray[idx[jj]]; 7774 AA[ncol_row] = vals[jj]; 7775 AJ[ncol_row] = cid; 7776 ncol_row++; 7777 } 7778 } 7779 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7780 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7781 } 7782 } 7783 PetscCall(PetscFree2(AA, AJ)); 7784 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7785 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7786 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7787 7788 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7789 7790 *filteredG = tGmat; 7791 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7792 PetscFunctionReturn(PETSC_SUCCESS); 7793 } 7794 7795 /* 7796 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7797 7798 Input Parameter: 7799 . Amat - matrix 7800 - symmetrize - make the result symmetric 7801 + scale - scale with diagonal 7802 7803 Output Parameter: 7804 . a_Gmat - output scalar graph >= 0 7805 7806 */ 7807 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7808 { 7809 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7810 MPI_Comm comm; 7811 Mat Gmat; 7812 PetscBool ismpiaij, isseqaij; 7813 Mat a, b, c; 7814 MatType jtype; 7815 7816 PetscFunctionBegin; 7817 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7818 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7819 PetscCall(MatGetSize(Amat, &MM, &NN)); 7820 PetscCall(MatGetBlockSize(Amat, &bs)); 7821 nloc = (Iend - Istart) / bs; 7822 7823 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7824 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7825 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7826 7827 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7828 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7829 implementation */ 7830 if (bs > 1) { 7831 PetscCall(MatGetType(Amat, &jtype)); 7832 PetscCall(MatCreate(comm, &Gmat)); 7833 PetscCall(MatSetType(Gmat, jtype)); 7834 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7835 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7836 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7837 PetscInt *d_nnz, *o_nnz; 7838 MatScalar *aa, val, *AA; 7839 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7840 if (isseqaij) { 7841 a = Amat; 7842 b = NULL; 7843 } else { 7844 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7845 a = d->A; 7846 b = d->B; 7847 } 7848 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7849 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7850 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7851 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7852 const PetscInt *cols; 7853 for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows 7854 PetscCall(MatGetRow(c, brow, &jj, &cols, NULL)); 7855 nnz[brow / bs] = jj / bs; 7856 if (jj % bs) ok = 0; 7857 if (cols) j0 = cols[0]; 7858 else j0 = -1; 7859 PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL)); 7860 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7861 for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks 7862 PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL)); 7863 if (jj % bs) ok = 0; 7864 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7865 if (nnz[brow / bs] != jj / bs) ok = 0; 7866 PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL)); 7867 } 7868 if (!ok) { 7869 PetscCall(PetscFree2(d_nnz, o_nnz)); 7870 goto old_bs; 7871 } 7872 } 7873 } 7874 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7875 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7876 PetscCall(PetscFree2(d_nnz, o_nnz)); 7877 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7878 // diag 7879 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7880 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7881 ai = aseq->i; 7882 n = ai[brow + 1] - ai[brow]; 7883 aj = aseq->j + ai[brow]; 7884 for (int k = 0; k < n; k += bs) { // block columns 7885 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7886 val = 0; 7887 for (int ii = 0; ii < bs; ii++) { // rows in block 7888 aa = aseq->a + ai[brow + ii] + k; 7889 for (int jj = 0; jj < bs; jj++) { // columns in block 7890 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7891 } 7892 } 7893 AA[k / bs] = val; 7894 } 7895 grow = Istart / bs + brow / bs; 7896 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7897 } 7898 // off-diag 7899 if (ismpiaij) { 7900 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7901 const PetscScalar *vals; 7902 const PetscInt *cols, *garray = aij->garray; 7903 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7904 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7905 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7906 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7907 AA[k / bs] = 0; 7908 AJ[cidx] = garray[cols[k]] / bs; 7909 } 7910 nc = ncols / bs; 7911 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7912 for (int ii = 0; ii < bs; ii++) { // rows in block 7913 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7914 for (int k = 0; k < ncols; k += bs) { 7915 for (int jj = 0; jj < bs; jj++) { // cols in block 7916 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7917 } 7918 } 7919 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7920 } 7921 grow = Istart / bs + brow / bs; 7922 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7923 } 7924 } 7925 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7926 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7927 PetscCall(PetscFree2(AA, AJ)); 7928 } else { 7929 const PetscScalar *vals; 7930 const PetscInt *idx; 7931 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7932 old_bs: 7933 /* 7934 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7935 */ 7936 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7937 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7938 if (isseqaij) { 7939 PetscInt max_d_nnz; 7940 /* 7941 Determine exact preallocation count for (sequential) scalar matrix 7942 */ 7943 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7944 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7945 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7946 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7947 PetscCall(PetscFree3(w0, w1, w2)); 7948 } else if (ismpiaij) { 7949 Mat Daij, Oaij; 7950 const PetscInt *garray; 7951 PetscInt max_d_nnz; 7952 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7953 /* 7954 Determine exact preallocation count for diagonal block portion of scalar matrix 7955 */ 7956 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7957 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7958 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7959 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7960 PetscCall(PetscFree3(w0, w1, w2)); 7961 /* 7962 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7963 */ 7964 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7965 o_nnz[jj] = 0; 7966 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7967 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7968 o_nnz[jj] += ncols; 7969 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7970 } 7971 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7972 } 7973 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7974 /* get scalar copy (norms) of matrix */ 7975 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7976 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7977 PetscCall(PetscFree2(d_nnz, o_nnz)); 7978 for (Ii = Istart; Ii < Iend; Ii++) { 7979 PetscInt dest_row = Ii / bs; 7980 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7981 for (jj = 0; jj < ncols; jj++) { 7982 PetscInt dest_col = idx[jj] / bs; 7983 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7984 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7985 } 7986 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7987 } 7988 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7989 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7990 } 7991 } else { 7992 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7993 else { 7994 Gmat = Amat; 7995 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7996 } 7997 if (isseqaij) { 7998 a = Gmat; 7999 b = NULL; 8000 } else { 8001 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8002 a = d->A; 8003 b = d->B; 8004 } 8005 if (filter >= 0 || scale) { 8006 /* take absolute value of each entry */ 8007 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8008 MatInfo info; 8009 PetscScalar *avals; 8010 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8011 PetscCall(MatSeqAIJGetArray(c, &avals)); 8012 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8013 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8014 } 8015 } 8016 } 8017 if (symmetrize) { 8018 PetscBool isset, issym; 8019 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8020 if (!isset || !issym) { 8021 Mat matTrans; 8022 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8023 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8024 PetscCall(MatDestroy(&matTrans)); 8025 } 8026 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8027 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8028 if (scale) { 8029 /* scale c for all diagonal values = 1 or -1 */ 8030 Vec diag; 8031 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8032 PetscCall(MatGetDiagonal(Gmat, diag)); 8033 PetscCall(VecReciprocal(diag)); 8034 PetscCall(VecSqrtAbs(diag)); 8035 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8036 PetscCall(VecDestroy(&diag)); 8037 } 8038 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8039 8040 if (filter >= 0) { 8041 Mat Fmat = NULL; /* some silly compiler needs this */ 8042 8043 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 8044 PetscCall(MatDestroy(&Gmat)); 8045 Gmat = Fmat; 8046 } 8047 *a_Gmat = Gmat; 8048 PetscFunctionReturn(PETSC_SUCCESS); 8049 } 8050 8051 /* 8052 Special version for direct calls from Fortran 8053 */ 8054 #include <petsc/private/fortranimpl.h> 8055 8056 /* Change these macros so can be used in void function */ 8057 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8058 #undef PetscCall 8059 #define PetscCall(...) \ 8060 do { \ 8061 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8062 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8063 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8064 return; \ 8065 } \ 8066 } while (0) 8067 8068 #undef SETERRQ 8069 #define SETERRQ(comm, ierr, ...) \ 8070 do { \ 8071 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8072 return; \ 8073 } while (0) 8074 8075 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8076 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8077 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8078 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8079 #else 8080 #endif 8081 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8082 { 8083 Mat mat = *mmat; 8084 PetscInt m = *mm, n = *mn; 8085 InsertMode addv = *maddv; 8086 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8087 PetscScalar value; 8088 8089 MatCheckPreallocated(mat, 1); 8090 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8091 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8092 { 8093 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8094 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8095 PetscBool roworiented = aij->roworiented; 8096 8097 /* Some Variables required in the macro */ 8098 Mat A = aij->A; 8099 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8100 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8101 MatScalar *aa; 8102 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8103 Mat B = aij->B; 8104 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8105 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8106 MatScalar *ba; 8107 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8108 * cannot use "#if defined" inside a macro. */ 8109 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8110 8111 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8112 PetscInt nonew = a->nonew; 8113 MatScalar *ap1, *ap2; 8114 8115 PetscFunctionBegin; 8116 PetscCall(MatSeqAIJGetArray(A, &aa)); 8117 PetscCall(MatSeqAIJGetArray(B, &ba)); 8118 for (i = 0; i < m; i++) { 8119 if (im[i] < 0) continue; 8120 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8121 if (im[i] >= rstart && im[i] < rend) { 8122 row = im[i] - rstart; 8123 lastcol1 = -1; 8124 rp1 = aj + ai[row]; 8125 ap1 = aa + ai[row]; 8126 rmax1 = aimax[row]; 8127 nrow1 = ailen[row]; 8128 low1 = 0; 8129 high1 = nrow1; 8130 lastcol2 = -1; 8131 rp2 = bj + bi[row]; 8132 ap2 = ba + bi[row]; 8133 rmax2 = bimax[row]; 8134 nrow2 = bilen[row]; 8135 low2 = 0; 8136 high2 = nrow2; 8137 8138 for (j = 0; j < n; j++) { 8139 if (roworiented) value = v[i * n + j]; 8140 else value = v[i + j * m]; 8141 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8142 if (in[j] >= cstart && in[j] < cend) { 8143 col = in[j] - cstart; 8144 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8145 } else if (in[j] < 0) continue; 8146 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8147 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8148 } else { 8149 if (mat->was_assembled) { 8150 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8151 #if defined(PETSC_USE_CTABLE) 8152 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8153 col--; 8154 #else 8155 col = aij->colmap[in[j]] - 1; 8156 #endif 8157 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8158 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8159 col = in[j]; 8160 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8161 B = aij->B; 8162 b = (Mat_SeqAIJ *)B->data; 8163 bimax = b->imax; 8164 bi = b->i; 8165 bilen = b->ilen; 8166 bj = b->j; 8167 rp2 = bj + bi[row]; 8168 ap2 = ba + bi[row]; 8169 rmax2 = bimax[row]; 8170 nrow2 = bilen[row]; 8171 low2 = 0; 8172 high2 = nrow2; 8173 bm = aij->B->rmap->n; 8174 ba = b->a; 8175 inserted = PETSC_FALSE; 8176 } 8177 } else col = in[j]; 8178 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8179 } 8180 } 8181 } else if (!aij->donotstash) { 8182 if (roworiented) { 8183 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8184 } else { 8185 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8186 } 8187 } 8188 } 8189 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8190 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8191 } 8192 PetscFunctionReturnVoid(); 8193 } 8194 8195 /* Undefining these here since they were redefined from their original definition above! No 8196 * other PETSc functions should be defined past this point, as it is impossible to recover the 8197 * original definitions */ 8198 #undef PetscCall 8199 #undef SETERRQ 8200