1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 15 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 16 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 17 PetscCall(MatDestroy(&B)); 18 PetscFunctionReturn(0); 19 } 20 21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 22 { 23 Mat B; 24 25 PetscFunctionBegin; 26 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 27 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 28 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 29 PetscFunctionReturn(0); 30 } 31 32 /*MC 33 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 34 35 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 36 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 37 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 38 for communicators controlling multiple processes. It is recommended that you call both of 39 the above preallocation routines for simplicity. 40 41 Options Database Keys: 42 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 43 44 Developer Note: 45 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 46 enough exist. 47 48 Level: beginner 49 50 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 51 M*/ 52 53 /*MC 54 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 55 56 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 57 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 58 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 59 for communicators controlling multiple processes. It is recommended that you call both of 60 the above preallocation routines for simplicity. 61 62 Options Database Keys: 63 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 64 65 Level: beginner 66 67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 68 M*/ 69 70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 71 { 72 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 73 74 PetscFunctionBegin; 75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 76 A->boundtocpu = flg; 77 #endif 78 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 79 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 80 81 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 82 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 83 * to differ from the parent matrix. */ 84 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 85 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 86 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 91 { 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 93 94 PetscFunctionBegin; 95 if (mat->A) { 96 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 97 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 103 { 104 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 105 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 106 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 107 const PetscInt *ia, *ib; 108 const MatScalar *aa, *bb, *aav, *bav; 109 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 110 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 111 112 PetscFunctionBegin; 113 *keptrows = NULL; 114 115 ia = a->i; 116 ib = b->i; 117 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 118 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 119 for (i = 0; i < m; i++) { 120 na = ia[i + 1] - ia[i]; 121 nb = ib[i + 1] - ib[i]; 122 if (!na && !nb) { 123 cnt++; 124 goto ok1; 125 } 126 aa = aav + ia[i]; 127 for (j = 0; j < na; j++) { 128 if (aa[j] != 0.0) goto ok1; 129 } 130 bb = bav + ib[i]; 131 for (j = 0; j < nb; j++) { 132 if (bb[j] != 0.0) goto ok1; 133 } 134 cnt++; 135 ok1:; 136 } 137 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 138 if (!n0rows) { 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 140 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 141 PetscFunctionReturn(0); 142 } 143 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 144 cnt = 0; 145 for (i = 0; i < m; i++) { 146 na = ia[i + 1] - ia[i]; 147 nb = ib[i + 1] - ib[i]; 148 if (!na && !nb) continue; 149 aa = aav + ia[i]; 150 for (j = 0; j < na; j++) { 151 if (aa[j] != 0.0) { 152 rows[cnt++] = rstart + i; 153 goto ok2; 154 } 155 } 156 bb = bav + ib[i]; 157 for (j = 0; j < nb; j++) { 158 if (bb[j] != 0.0) { 159 rows[cnt++] = rstart + i; 160 goto ok2; 161 } 162 } 163 ok2:; 164 } 165 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 167 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 174 PetscBool cong; 175 176 PetscFunctionBegin; 177 PetscCall(MatHasCongruentLayouts(Y, &cong)); 178 if (Y->assembled && cong) { 179 PetscCall(MatDiagonalSet(aij->A, D, is)); 180 } else { 181 PetscCall(MatDiagonalSet_Default(Y, D, is)); 182 } 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 189 PetscInt i, rstart, nrows, *rows; 190 191 PetscFunctionBegin; 192 *zrows = NULL; 193 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 194 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 195 for (i = 0; i < nrows; i++) rows[i] += rstart; 196 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 201 { 202 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 203 PetscInt i, m, n, *garray = aij->garray; 204 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 205 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 206 PetscReal *work; 207 const PetscScalar *dummy; 208 209 PetscFunctionBegin; 210 PetscCall(MatGetSize(A, &m, &n)); 211 PetscCall(PetscCalloc1(n, &work)); 212 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 213 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 214 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 215 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 216 if (type == NORM_2) { 217 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 218 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 219 } else if (type == NORM_1) { 220 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 221 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 222 } else if (type == NORM_INFINITY) { 223 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 224 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 225 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 226 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 227 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 228 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 229 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 230 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 231 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 232 if (type == NORM_INFINITY) { 233 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 234 } else { 235 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 236 } 237 PetscCall(PetscFree(work)); 238 if (type == NORM_2) { 239 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 240 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 241 for (i = 0; i < n; i++) reductions[i] /= m; 242 } 243 PetscFunctionReturn(0); 244 } 245 246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 247 { 248 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 249 IS sis, gis; 250 const PetscInt *isis, *igis; 251 PetscInt n, *iis, nsis, ngis, rstart, i; 252 253 PetscFunctionBegin; 254 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 255 PetscCall(MatFindNonzeroRows(a->B, &gis)); 256 PetscCall(ISGetSize(gis, &ngis)); 257 PetscCall(ISGetSize(sis, &nsis)); 258 PetscCall(ISGetIndices(sis, &isis)); 259 PetscCall(ISGetIndices(gis, &igis)); 260 261 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 262 PetscCall(PetscArraycpy(iis, igis, ngis)); 263 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 264 n = ngis + nsis; 265 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 266 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 267 for (i = 0; i < n; i++) iis[i] += rstart; 268 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 269 270 PetscCall(ISRestoreIndices(sis, &isis)); 271 PetscCall(ISRestoreIndices(gis, &igis)); 272 PetscCall(ISDestroy(&sis)); 273 PetscCall(ISDestroy(&gis)); 274 PetscFunctionReturn(0); 275 } 276 277 /* 278 Local utility routine that creates a mapping from the global column 279 number to the local number in the off-diagonal part of the local 280 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 281 a slightly higher hash table cost; without it it is not scalable (each processor 282 has an order N integer array but is fast to access. 283 */ 284 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 287 PetscInt n = aij->B->cmap->n, i; 288 289 PetscFunctionBegin; 290 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 291 #if defined(PETSC_USE_CTABLE) 292 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 293 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 294 #else 295 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 296 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 297 #endif 298 PetscFunctionReturn(0); 299 } 300 301 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 302 { \ 303 if (col <= lastcol1) low1 = 0; \ 304 else high1 = nrow1; \ 305 lastcol1 = col; \ 306 while (high1 - low1 > 5) { \ 307 t = (low1 + high1) / 2; \ 308 if (rp1[t] > col) high1 = t; \ 309 else low1 = t; \ 310 } \ 311 for (_i = low1; _i < high1; _i++) { \ 312 if (rp1[_i] > col) break; \ 313 if (rp1[_i] == col) { \ 314 if (addv == ADD_VALUES) { \ 315 ap1[_i] += value; \ 316 /* Not sure LogFlops will slow dow the code or not */ \ 317 (void)PetscLogFlops(1.0); \ 318 } else ap1[_i] = value; \ 319 goto a_noinsert; \ 320 } \ 321 } \ 322 if (value == 0.0 && ignorezeroentries && row != col) { \ 323 low1 = 0; \ 324 high1 = nrow1; \ 325 goto a_noinsert; \ 326 } \ 327 if (nonew == 1) { \ 328 low1 = 0; \ 329 high1 = nrow1; \ 330 goto a_noinsert; \ 331 } \ 332 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 333 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 334 N = nrow1++ - 1; \ 335 a->nz++; \ 336 high1++; \ 337 /* shift up all the later entries in this row */ \ 338 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 339 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 340 rp1[_i] = col; \ 341 ap1[_i] = value; \ 342 A->nonzerostate++; \ 343 a_noinsert:; \ 344 ailen[row] = nrow1; \ 345 } 346 347 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 348 { \ 349 if (col <= lastcol2) low2 = 0; \ 350 else high2 = nrow2; \ 351 lastcol2 = col; \ 352 while (high2 - low2 > 5) { \ 353 t = (low2 + high2) / 2; \ 354 if (rp2[t] > col) high2 = t; \ 355 else low2 = t; \ 356 } \ 357 for (_i = low2; _i < high2; _i++) { \ 358 if (rp2[_i] > col) break; \ 359 if (rp2[_i] == col) { \ 360 if (addv == ADD_VALUES) { \ 361 ap2[_i] += value; \ 362 (void)PetscLogFlops(1.0); \ 363 } else ap2[_i] = value; \ 364 goto b_noinsert; \ 365 } \ 366 } \ 367 if (value == 0.0 && ignorezeroentries) { \ 368 low2 = 0; \ 369 high2 = nrow2; \ 370 goto b_noinsert; \ 371 } \ 372 if (nonew == 1) { \ 373 low2 = 0; \ 374 high2 = nrow2; \ 375 goto b_noinsert; \ 376 } \ 377 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 378 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 379 N = nrow2++ - 1; \ 380 b->nz++; \ 381 high2++; \ 382 /* shift up all the later entries in this row */ \ 383 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 384 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 385 rp2[_i] = col; \ 386 ap2[_i] = value; \ 387 B->nonzerostate++; \ 388 b_noinsert:; \ 389 bilen[row] = nrow2; \ 390 } 391 392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 393 { 394 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 395 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 396 PetscInt l, *garray = mat->garray, diag; 397 PetscScalar *aa, *ba; 398 399 PetscFunctionBegin; 400 /* code only works for square matrices A */ 401 402 /* find size of row to the left of the diagonal part */ 403 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 404 row = row - diag; 405 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 406 if (garray[b->j[b->i[row] + l]] > diag) break; 407 } 408 if (l) { 409 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 410 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 411 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 412 } 413 414 /* diagonal part */ 415 if (a->i[row + 1] - a->i[row]) { 416 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 417 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 418 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 419 } 420 421 /* right of diagonal part */ 422 if (b->i[row + 1] - b->i[row] - l) { 423 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 424 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 425 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 426 } 427 PetscFunctionReturn(0); 428 } 429 430 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 431 { 432 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 433 PetscScalar value = 0.0; 434 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 435 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 436 PetscBool roworiented = aij->roworiented; 437 438 /* Some Variables required in the macro */ 439 Mat A = aij->A; 440 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 441 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 442 PetscBool ignorezeroentries = a->ignorezeroentries; 443 Mat B = aij->B; 444 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 445 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 446 MatScalar *aa, *ba; 447 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 448 PetscInt nonew; 449 MatScalar *ap1, *ap2; 450 451 PetscFunctionBegin; 452 PetscCall(MatSeqAIJGetArray(A, &aa)); 453 PetscCall(MatSeqAIJGetArray(B, &ba)); 454 for (i = 0; i < m; i++) { 455 if (im[i] < 0) continue; 456 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 lastcol1 = -1; 460 rp1 = aj + ai[row]; 461 ap1 = aa + ai[row]; 462 rmax1 = aimax[row]; 463 nrow1 = ailen[row]; 464 low1 = 0; 465 high1 = nrow1; 466 lastcol2 = -1; 467 rp2 = bj + bi[row]; 468 ap2 = ba + bi[row]; 469 rmax2 = bimax[row]; 470 nrow2 = bilen[row]; 471 low2 = 0; 472 high2 = nrow2; 473 474 for (j = 0; j < n; j++) { 475 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 476 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 477 if (in[j] >= cstart && in[j] < cend) { 478 col = in[j] - cstart; 479 nonew = a->nonew; 480 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 481 } else if (in[j] < 0) { 482 continue; 483 } else { 484 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 485 if (mat->was_assembled) { 486 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 487 #if defined(PETSC_USE_CTABLE) 488 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 489 col--; 490 #else 491 col = aij->colmap[in[j]] - 1; 492 #endif 493 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 494 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 495 col = in[j]; 496 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 497 B = aij->B; 498 b = (Mat_SeqAIJ *)B->data; 499 bimax = b->imax; 500 bi = b->i; 501 bilen = b->ilen; 502 bj = b->j; 503 ba = b->a; 504 rp2 = bj + bi[row]; 505 ap2 = ba + bi[row]; 506 rmax2 = bimax[row]; 507 nrow2 = bilen[row]; 508 low2 = 0; 509 high2 = nrow2; 510 bm = aij->B->rmap->n; 511 ba = b->a; 512 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 513 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 514 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 515 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 516 } 517 } else col = in[j]; 518 nonew = b->nonew; 519 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 520 } 521 } 522 } else { 523 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 524 if (!aij->donotstash) { 525 mat->assembled = PETSC_FALSE; 526 if (roworiented) { 527 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 528 } else { 529 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } 531 } 532 } 533 } 534 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 535 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 536 PetscFunctionReturn(0); 537 } 538 539 /* 540 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 541 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 542 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 543 */ 544 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 545 { 546 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 547 Mat A = aij->A; /* diagonal part of the matrix */ 548 Mat B = aij->B; /* offdiagonal part of the matrix */ 549 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 550 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 551 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 552 PetscInt *ailen = a->ilen, *aj = a->j; 553 PetscInt *bilen = b->ilen, *bj = b->j; 554 PetscInt am = aij->A->rmap->n, j; 555 PetscInt diag_so_far = 0, dnz; 556 PetscInt offd_so_far = 0, onz; 557 558 PetscFunctionBegin; 559 /* Iterate over all rows of the matrix */ 560 for (j = 0; j < am; j++) { 561 dnz = onz = 0; 562 /* Iterate over all non-zero columns of the current row */ 563 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 564 /* If column is in the diagonal */ 565 if (mat_j[col] >= cstart && mat_j[col] < cend) { 566 aj[diag_so_far++] = mat_j[col] - cstart; 567 dnz++; 568 } else { /* off-diagonal entries */ 569 bj[offd_so_far++] = mat_j[col]; 570 onz++; 571 } 572 } 573 ailen[j] = dnz; 574 bilen[j] = onz; 575 } 576 PetscFunctionReturn(0); 577 } 578 579 /* 580 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 581 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 582 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 583 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 584 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 585 */ 586 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 587 { 588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 589 Mat A = aij->A; /* diagonal part of the matrix */ 590 Mat B = aij->B; /* offdiagonal part of the matrix */ 591 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 592 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 593 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 594 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 595 PetscInt *ailen = a->ilen, *aj = a->j; 596 PetscInt *bilen = b->ilen, *bj = b->j; 597 PetscInt am = aij->A->rmap->n, j; 598 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 599 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 600 PetscScalar *aa = a->a, *ba = b->a; 601 602 PetscFunctionBegin; 603 /* Iterate over all rows of the matrix */ 604 for (j = 0; j < am; j++) { 605 dnz_row = onz_row = 0; 606 rowstart_offd = full_offd_i[j]; 607 rowstart_diag = full_diag_i[j]; 608 /* Iterate over all non-zero columns of the current row */ 609 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 610 /* If column is in the diagonal */ 611 if (mat_j[col] >= cstart && mat_j[col] < cend) { 612 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 613 aa[rowstart_diag + dnz_row] = mat_a[col]; 614 dnz_row++; 615 } else { /* off-diagonal entries */ 616 bj[rowstart_offd + onz_row] = mat_j[col]; 617 ba[rowstart_offd + onz_row] = mat_a[col]; 618 onz_row++; 619 } 620 } 621 ailen[j] = dnz_row; 622 bilen[j] = onz_row; 623 } 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 628 { 629 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 630 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 632 633 PetscFunctionBegin; 634 for (i = 0; i < m; i++) { 635 if (idxm[i] < 0) continue; /* negative row */ 636 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 637 if (idxm[i] >= rstart && idxm[i] < rend) { 638 row = idxm[i] - rstart; 639 for (j = 0; j < n; j++) { 640 if (idxn[j] < 0) continue; /* negative column */ 641 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 642 if (idxn[j] >= cstart && idxn[j] < cend) { 643 col = idxn[j] - cstart; 644 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 645 } else { 646 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 647 #if defined(PETSC_USE_CTABLE) 648 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 649 col--; 650 #else 651 col = aij->colmap[idxn[j]] - 1; 652 #endif 653 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 654 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 655 } 656 } 657 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 658 } 659 PetscFunctionReturn(0); 660 } 661 662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 663 { 664 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 665 PetscInt nstash, reallocs; 666 667 PetscFunctionBegin; 668 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 669 670 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 671 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 672 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 673 PetscFunctionReturn(0); 674 } 675 676 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 679 PetscMPIInt n; 680 PetscInt i, j, rstart, ncols, flg; 681 PetscInt *row, *col; 682 PetscBool other_disassembled; 683 PetscScalar *val; 684 685 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 686 687 PetscFunctionBegin; 688 if (!aij->donotstash && !mat->nooffprocentries) { 689 while (1) { 690 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 691 if (!flg) break; 692 693 for (i = 0; i < n;) { 694 /* Now identify the consecutive vals belonging to the same row */ 695 for (j = i, rstart = row[j]; j < n; j++) { 696 if (row[j] != rstart) break; 697 } 698 if (j < n) ncols = j - i; 699 else ncols = n - i; 700 /* Now assemble all these values with a single function call */ 701 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 702 i = j; 703 } 704 } 705 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 706 } 707 #if defined(PETSC_HAVE_DEVICE) 708 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 709 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 710 if (mat->boundtocpu) { 711 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 712 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 713 } 714 #endif 715 PetscCall(MatAssemblyBegin(aij->A, mode)); 716 PetscCall(MatAssemblyEnd(aij->A, mode)); 717 718 /* determine if any processor has disassembled, if so we must 719 also disassemble ourself, in order that we may reassemble. */ 720 /* 721 if nonzero structure of submatrix B cannot change then we know that 722 no processor disassembled thus we can skip this stuff 723 */ 724 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 725 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 726 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 727 PetscCall(MatDisAssemble_MPIAIJ(mat)); 728 } 729 } 730 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 731 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 732 #if defined(PETSC_HAVE_DEVICE) 733 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 734 #endif 735 PetscCall(MatAssemblyBegin(aij->B, mode)); 736 PetscCall(MatAssemblyEnd(aij->B, mode)); 737 738 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 739 740 aij->rowvalues = NULL; 741 742 PetscCall(VecDestroy(&aij->diag)); 743 744 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 745 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 746 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 747 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 748 } 749 #if defined(PETSC_HAVE_DEVICE) 750 mat->offloadmask = PETSC_OFFLOAD_BOTH; 751 #endif 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 756 { 757 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 758 759 PetscFunctionBegin; 760 PetscCall(MatZeroEntries(l->A)); 761 PetscCall(MatZeroEntries(l->B)); 762 PetscFunctionReturn(0); 763 } 764 765 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 766 { 767 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 768 PetscObjectState sA, sB; 769 PetscInt *lrows; 770 PetscInt r, len; 771 PetscBool cong, lch, gch; 772 773 PetscFunctionBegin; 774 /* get locally owned rows */ 775 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 776 PetscCall(MatHasCongruentLayouts(A, &cong)); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 783 PetscCall(VecGetArrayRead(x, &xx)); 784 PetscCall(VecGetArray(b, &bb)); 785 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 786 PetscCall(VecRestoreArrayRead(x, &xx)); 787 PetscCall(VecRestoreArray(b, &bb)); 788 } 789 790 sA = mat->A->nonzerostate; 791 sB = mat->B->nonzerostate; 792 793 if (diag != 0.0 && cong) { 794 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 795 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 796 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 797 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 798 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 799 PetscInt nnwA, nnwB; 800 PetscBool nnzA, nnzB; 801 802 nnwA = aijA->nonew; 803 nnwB = aijB->nonew; 804 nnzA = aijA->keepnonzeropattern; 805 nnzB = aijB->keepnonzeropattern; 806 if (!nnzA) { 807 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 808 aijA->nonew = 0; 809 } 810 if (!nnzB) { 811 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 812 aijB->nonew = 0; 813 } 814 /* Must zero here before the next loop */ 815 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 816 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 if (row >= A->cmap->N) continue; 820 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 821 } 822 aijA->nonew = nnwA; 823 aijB->nonew = nnwB; 824 } else { 825 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 826 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 827 } 828 PetscCall(PetscFree(lrows)); 829 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 830 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 831 832 /* reduce nonzerostate */ 833 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 834 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 835 if (gch) A->nonzerostate++; 836 PetscFunctionReturn(0); 837 } 838 839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 840 { 841 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i, j, r, m, len = 0; 844 PetscInt *lrows, *owners = A->rmap->range; 845 PetscMPIInt p = 0; 846 PetscSFNode *rrows; 847 PetscSF sf; 848 const PetscScalar *xx; 849 PetscScalar *bb, *mask, *aij_a; 850 Vec xmask, lmask; 851 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 852 const PetscInt *aj, *ii, *ridx; 853 PetscScalar *aa; 854 855 PetscFunctionBegin; 856 /* Create SF where leaves are input rows and roots are owned rows */ 857 PetscCall(PetscMalloc1(n, &lrows)); 858 for (r = 0; r < n; ++r) lrows[r] = -1; 859 PetscCall(PetscMalloc1(N, &rrows)); 860 for (r = 0; r < N; ++r) { 861 const PetscInt idx = rows[r]; 862 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 863 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 864 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 865 } 866 rrows[r].rank = p; 867 rrows[r].index = rows[r] - owners[p]; 868 } 869 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 870 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 871 /* Collect flags for rows to be zeroed */ 872 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 873 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 874 PetscCall(PetscSFDestroy(&sf)); 875 /* Compress and put in row numbers */ 876 for (r = 0; r < n; ++r) 877 if (lrows[r] >= 0) lrows[len++] = r; 878 /* zero diagonal part of matrix */ 879 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 880 /* handle off diagonal part of matrix */ 881 PetscCall(MatCreateVecs(A, &xmask, NULL)); 882 PetscCall(VecDuplicate(l->lvec, &lmask)); 883 PetscCall(VecGetArray(xmask, &bb)); 884 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 885 PetscCall(VecRestoreArray(xmask, &bb)); 886 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 887 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 888 PetscCall(VecDestroy(&xmask)); 889 if (x && b) { /* this code is buggy when the row and column layout don't match */ 890 PetscBool cong; 891 892 PetscCall(MatHasCongruentLayouts(A, &cong)); 893 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 894 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 895 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 896 PetscCall(VecGetArrayRead(l->lvec, &xx)); 897 PetscCall(VecGetArray(b, &bb)); 898 } 899 PetscCall(VecGetArray(lmask, &mask)); 900 /* remove zeroed rows of off diagonal matrix */ 901 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 902 ii = aij->i; 903 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 904 /* loop over all elements of off process part of matrix zeroing removed columns*/ 905 if (aij->compressedrow.use) { 906 m = aij->compressedrow.nrows; 907 ii = aij->compressedrow.i; 908 ridx = aij->compressedrow.rindex; 909 for (i = 0; i < m; i++) { 910 n = ii[i + 1] - ii[i]; 911 aj = aij->j + ii[i]; 912 aa = aij_a + ii[i]; 913 914 for (j = 0; j < n; j++) { 915 if (PetscAbsScalar(mask[*aj])) { 916 if (b) bb[*ridx] -= *aa * xx[*aj]; 917 *aa = 0.0; 918 } 919 aa++; 920 aj++; 921 } 922 ridx++; 923 } 924 } else { /* do not use compressed row format */ 925 m = l->B->rmap->n; 926 for (i = 0; i < m; i++) { 927 n = ii[i + 1] - ii[i]; 928 aj = aij->j + ii[i]; 929 aa = aij_a + ii[i]; 930 for (j = 0; j < n; j++) { 931 if (PetscAbsScalar(mask[*aj])) { 932 if (b) bb[i] -= *aa * xx[*aj]; 933 *aa = 0.0; 934 } 935 aa++; 936 aj++; 937 } 938 } 939 } 940 if (x && b) { 941 PetscCall(VecRestoreArray(b, &bb)); 942 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 943 } 944 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 945 PetscCall(VecRestoreArray(lmask, &mask)); 946 PetscCall(VecDestroy(&lmask)); 947 PetscCall(PetscFree(lrows)); 948 949 /* only change matrix nonzero state if pattern was allowed to be changed */ 950 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 951 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 952 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 958 { 959 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 960 PetscInt nt; 961 VecScatter Mvctx = a->Mvctx; 962 963 PetscFunctionBegin; 964 PetscCall(VecGetLocalSize(xx, &nt)); 965 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 966 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 967 PetscUseTypeMethod(a->A, mult, xx, yy); 968 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 969 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 976 977 PetscFunctionBegin; 978 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 985 VecScatter Mvctx = a->Mvctx; 986 987 PetscFunctionBegin; 988 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 989 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 990 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 992 PetscFunctionReturn(0); 993 } 994 995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 998 999 PetscFunctionBegin; 1000 /* do nondiagonal part */ 1001 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1002 /* do local part */ 1003 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1004 /* add partial results together */ 1005 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1006 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1011 { 1012 MPI_Comm comm; 1013 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij; 1014 Mat Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs; 1015 IS Me, Notme; 1016 PetscInt M, N, first, last, *notme, i; 1017 PetscBool lf; 1018 PetscMPIInt size; 1019 1020 PetscFunctionBegin; 1021 /* Easy test: symmetric diagonal block */ 1022 Bij = (Mat_MPIAIJ *)Bmat->data; 1023 Bdia = Bij->A; 1024 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1025 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1026 if (!*f) PetscFunctionReturn(0); 1027 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1028 PetscCallMPI(MPI_Comm_size(comm, &size)); 1029 if (size == 1) PetscFunctionReturn(0); 1030 1031 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1032 PetscCall(MatGetSize(Amat, &M, &N)); 1033 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1034 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1035 for (i = 0; i < first; i++) notme[i] = i; 1036 for (i = last; i < M; i++) notme[i - last + first] = i; 1037 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1038 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1039 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1040 Aoff = Aoffs[0]; 1041 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1042 Boff = Boffs[0]; 1043 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1044 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1045 PetscCall(MatDestroyMatrices(1, &Boffs)); 1046 PetscCall(ISDestroy(&Me)); 1047 PetscCall(ISDestroy(&Notme)); 1048 PetscCall(PetscFree(notme)); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1053 { 1054 PetscFunctionBegin; 1055 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1060 { 1061 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1062 1063 PetscFunctionBegin; 1064 /* do nondiagonal part */ 1065 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1066 /* do local part */ 1067 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1068 /* add partial results together */ 1069 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1070 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 /* 1075 This only works correctly for square matrices where the subblock A->A is the 1076 diagonal block 1077 */ 1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1079 { 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1081 1082 PetscFunctionBegin; 1083 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1084 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1085 PetscCall(MatGetDiagonal(a->A, v)); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1092 1093 PetscFunctionBegin; 1094 PetscCall(MatScale(a->A, aa)); 1095 PetscCall(MatScale(a->B, aa)); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1100 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1101 { 1102 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1103 1104 PetscFunctionBegin; 1105 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1106 PetscCall(PetscFree(aij->Aperm1)); 1107 PetscCall(PetscFree(aij->Bperm1)); 1108 PetscCall(PetscFree(aij->Ajmap1)); 1109 PetscCall(PetscFree(aij->Bjmap1)); 1110 1111 PetscCall(PetscFree(aij->Aimap2)); 1112 PetscCall(PetscFree(aij->Bimap2)); 1113 PetscCall(PetscFree(aij->Aperm2)); 1114 PetscCall(PetscFree(aij->Bperm2)); 1115 PetscCall(PetscFree(aij->Ajmap2)); 1116 PetscCall(PetscFree(aij->Bjmap2)); 1117 1118 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1119 PetscCall(PetscFree(aij->Cperm1)); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1124 { 1125 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1126 1127 PetscFunctionBegin; 1128 #if defined(PETSC_USE_LOG) 1129 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1130 #endif 1131 PetscCall(MatStashDestroy_Private(&mat->stash)); 1132 PetscCall(VecDestroy(&aij->diag)); 1133 PetscCall(MatDestroy(&aij->A)); 1134 PetscCall(MatDestroy(&aij->B)); 1135 #if defined(PETSC_USE_CTABLE) 1136 PetscCall(PetscHMapIDestroy(&aij->colmap)); 1137 #else 1138 PetscCall(PetscFree(aij->colmap)); 1139 #endif 1140 PetscCall(PetscFree(aij->garray)); 1141 PetscCall(VecDestroy(&aij->lvec)); 1142 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1143 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 1144 PetscCall(PetscFree(aij->ld)); 1145 1146 /* Free COO */ 1147 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1148 1149 PetscCall(PetscFree(mat->data)); 1150 1151 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1152 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 1153 1154 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1157 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 1158 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 1159 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 1160 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 1161 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 1164 #if defined(PETSC_HAVE_CUDA) 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 1166 #endif 1167 #if defined(PETSC_HAVE_HIP) 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 1169 #endif 1170 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 1172 #endif 1173 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 1174 #if defined(PETSC_HAVE_ELEMENTAL) 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 1176 #endif 1177 #if defined(PETSC_HAVE_SCALAPACK) 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 1179 #endif 1180 #if defined(PETSC_HAVE_HYPRE) 1181 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 1182 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 1183 #endif 1184 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1185 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 1188 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 1190 #if defined(PETSC_HAVE_MKL_SPARSE) 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 1192 #endif 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1195 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 1197 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1202 { 1203 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1204 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1205 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1206 const PetscInt *garray = aij->garray; 1207 const PetscScalar *aa, *ba; 1208 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1209 PetscInt *rowlens; 1210 PetscInt *colidxs; 1211 PetscScalar *matvals; 1212 1213 PetscFunctionBegin; 1214 PetscCall(PetscViewerSetUp(viewer)); 1215 1216 M = mat->rmap->N; 1217 N = mat->cmap->N; 1218 m = mat->rmap->n; 1219 rs = mat->rmap->rstart; 1220 cs = mat->cmap->rstart; 1221 nz = A->nz + B->nz; 1222 1223 /* write matrix header */ 1224 header[0] = MAT_FILE_CLASSID; 1225 header[1] = M; 1226 header[2] = N; 1227 header[3] = nz; 1228 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1229 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1230 1231 /* fill in and store row lengths */ 1232 PetscCall(PetscMalloc1(m, &rowlens)); 1233 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1234 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1235 PetscCall(PetscFree(rowlens)); 1236 1237 /* fill in and store column indices */ 1238 PetscCall(PetscMalloc1(nz, &colidxs)); 1239 for (cnt = 0, i = 0; i < m; i++) { 1240 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1241 if (garray[B->j[jb]] > cs) break; 1242 colidxs[cnt++] = garray[B->j[jb]]; 1243 } 1244 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1245 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1246 } 1247 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1248 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1249 PetscCall(PetscFree(colidxs)); 1250 1251 /* fill in and store nonzero values */ 1252 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1253 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1254 PetscCall(PetscMalloc1(nz, &matvals)); 1255 for (cnt = 0, i = 0; i < m; i++) { 1256 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1257 if (garray[B->j[jb]] > cs) break; 1258 matvals[cnt++] = ba[jb]; 1259 } 1260 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1261 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1262 } 1263 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1264 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1265 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1266 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1267 PetscCall(PetscFree(matvals)); 1268 1269 /* write block size option to the viewer's .info file */ 1270 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #include <petscdraw.h> 1275 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1276 { 1277 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1278 PetscMPIInt rank = aij->rank, size = aij->size; 1279 PetscBool isdraw, iascii, isbinary; 1280 PetscViewer sviewer; 1281 PetscViewerFormat format; 1282 1283 PetscFunctionBegin; 1284 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1285 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1286 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1287 if (iascii) { 1288 PetscCall(PetscViewerGetFormat(viewer, &format)); 1289 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1290 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1291 PetscCall(PetscMalloc1(size, &nz)); 1292 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1293 for (i = 0; i < (PetscInt)size; i++) { 1294 nmax = PetscMax(nmax, nz[i]); 1295 nmin = PetscMin(nmin, nz[i]); 1296 navg += nz[i]; 1297 } 1298 PetscCall(PetscFree(nz)); 1299 navg = navg / size; 1300 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1301 PetscFunctionReturn(0); 1302 } 1303 PetscCall(PetscViewerGetFormat(viewer, &format)); 1304 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1305 MatInfo info; 1306 PetscInt *inodes = NULL; 1307 1308 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1309 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1310 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1311 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1312 if (!inodes) { 1313 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1314 (double)info.memory)); 1315 } else { 1316 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1317 (double)info.memory)); 1318 } 1319 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1320 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1321 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1322 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1323 PetscCall(PetscViewerFlush(viewer)); 1324 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1325 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1326 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1327 PetscFunctionReturn(0); 1328 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1329 PetscInt inodecount, inodelimit, *inodes; 1330 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1331 if (inodes) { 1332 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1333 } else { 1334 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1335 } 1336 PetscFunctionReturn(0); 1337 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1338 PetscFunctionReturn(0); 1339 } 1340 } else if (isbinary) { 1341 if (size == 1) { 1342 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1343 PetscCall(MatView(aij->A, viewer)); 1344 } else { 1345 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1346 } 1347 PetscFunctionReturn(0); 1348 } else if (iascii && size == 1) { 1349 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1350 PetscCall(MatView(aij->A, viewer)); 1351 PetscFunctionReturn(0); 1352 } else if (isdraw) { 1353 PetscDraw draw; 1354 PetscBool isnull; 1355 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1356 PetscCall(PetscDrawIsNull(draw, &isnull)); 1357 if (isnull) PetscFunctionReturn(0); 1358 } 1359 1360 { /* assemble the entire matrix onto first processor */ 1361 Mat A = NULL, Av; 1362 IS isrow, iscol; 1363 1364 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1365 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1366 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1367 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1368 /* The commented code uses MatCreateSubMatrices instead */ 1369 /* 1370 Mat *AA, A = NULL, Av; 1371 IS isrow,iscol; 1372 1373 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1374 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1375 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1376 if (rank == 0) { 1377 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1378 A = AA[0]; 1379 Av = AA[0]; 1380 } 1381 PetscCall(MatDestroySubMatrices(1,&AA)); 1382 */ 1383 PetscCall(ISDestroy(&iscol)); 1384 PetscCall(ISDestroy(&isrow)); 1385 /* 1386 Everyone has to call to draw the matrix since the graphics waits are 1387 synchronized across all processors that share the PetscDraw object 1388 */ 1389 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1390 if (rank == 0) { 1391 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1392 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1393 } 1394 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1395 PetscCall(PetscViewerFlush(viewer)); 1396 PetscCall(MatDestroy(&A)); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1402 { 1403 PetscBool iascii, isdraw, issocket, isbinary; 1404 1405 PetscFunctionBegin; 1406 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1407 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1408 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1409 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1410 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1415 { 1416 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1417 Vec bb1 = NULL; 1418 PetscBool hasop; 1419 1420 PetscFunctionBegin; 1421 if (flag == SOR_APPLY_UPPER) { 1422 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1427 1428 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1429 if (flag & SOR_ZERO_INITIAL_GUESS) { 1430 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1431 its--; 1432 } 1433 1434 while (its--) { 1435 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1436 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 1438 /* update rhs: bb1 = bb - B*x */ 1439 PetscCall(VecScale(mat->lvec, -1.0)); 1440 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1441 1442 /* local sweep */ 1443 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1444 } 1445 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1446 if (flag & SOR_ZERO_INITIAL_GUESS) { 1447 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1448 its--; 1449 } 1450 while (its--) { 1451 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1452 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 1454 /* update rhs: bb1 = bb - B*x */ 1455 PetscCall(VecScale(mat->lvec, -1.0)); 1456 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1457 1458 /* local sweep */ 1459 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1460 } 1461 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1462 if (flag & SOR_ZERO_INITIAL_GUESS) { 1463 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1464 its--; 1465 } 1466 while (its--) { 1467 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1468 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1469 1470 /* update rhs: bb1 = bb - B*x */ 1471 PetscCall(VecScale(mat->lvec, -1.0)); 1472 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1473 1474 /* local sweep */ 1475 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1476 } 1477 } else if (flag & SOR_EISENSTAT) { 1478 Vec xx1; 1479 1480 PetscCall(VecDuplicate(bb, &xx1)); 1481 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1482 1483 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1484 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1485 if (!mat->diag) { 1486 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1487 PetscCall(MatGetDiagonal(matin, mat->diag)); 1488 } 1489 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1490 if (hasop) { 1491 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1492 } else { 1493 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1494 } 1495 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1496 1497 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1498 1499 /* local sweep */ 1500 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1501 PetscCall(VecAXPY(xx, 1.0, xx1)); 1502 PetscCall(VecDestroy(&xx1)); 1503 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1504 1505 PetscCall(VecDestroy(&bb1)); 1506 1507 matin->factorerrortype = mat->A->factorerrortype; 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1512 { 1513 Mat aA, aB, Aperm; 1514 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1515 PetscScalar *aa, *ba; 1516 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1517 PetscSF rowsf, sf; 1518 IS parcolp = NULL; 1519 PetscBool done; 1520 1521 PetscFunctionBegin; 1522 PetscCall(MatGetLocalSize(A, &m, &n)); 1523 PetscCall(ISGetIndices(rowp, &rwant)); 1524 PetscCall(ISGetIndices(colp, &cwant)); 1525 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1526 1527 /* Invert row permutation to find out where my rows should go */ 1528 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1529 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1530 PetscCall(PetscSFSetFromOptions(rowsf)); 1531 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1532 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1533 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1534 1535 /* Invert column permutation to find out where my columns should go */ 1536 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1537 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1538 PetscCall(PetscSFSetFromOptions(sf)); 1539 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1540 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1541 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(ISRestoreIndices(rowp, &rwant)); 1545 PetscCall(ISRestoreIndices(colp, &cwant)); 1546 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1547 1548 /* Find out where my gcols should go */ 1549 PetscCall(MatGetSize(aB, NULL, &ng)); 1550 PetscCall(PetscMalloc1(ng, &gcdest)); 1551 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1552 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1553 PetscCall(PetscSFSetFromOptions(sf)); 1554 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1555 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1556 PetscCall(PetscSFDestroy(&sf)); 1557 1558 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1559 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1560 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1561 for (i = 0; i < m; i++) { 1562 PetscInt row = rdest[i]; 1563 PetscMPIInt rowner; 1564 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1565 for (j = ai[i]; j < ai[i + 1]; j++) { 1566 PetscInt col = cdest[aj[j]]; 1567 PetscMPIInt cowner; 1568 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1569 if (rowner == cowner) dnnz[i]++; 1570 else onnz[i]++; 1571 } 1572 for (j = bi[i]; j < bi[i + 1]; j++) { 1573 PetscInt col = gcdest[bj[j]]; 1574 PetscMPIInt cowner; 1575 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1576 if (rowner == cowner) dnnz[i]++; 1577 else onnz[i]++; 1578 } 1579 } 1580 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1581 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1582 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1583 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1584 PetscCall(PetscSFDestroy(&rowsf)); 1585 1586 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1587 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1588 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1589 for (i = 0; i < m; i++) { 1590 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1591 PetscInt j0, rowlen; 1592 rowlen = ai[i + 1] - ai[i]; 1593 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1594 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1595 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1596 } 1597 rowlen = bi[i + 1] - bi[i]; 1598 for (j0 = j = 0; j < rowlen; j0 = j) { 1599 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1600 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1601 } 1602 } 1603 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1604 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1605 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1606 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1607 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1608 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1609 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1610 PetscCall(PetscFree3(work, rdest, cdest)); 1611 PetscCall(PetscFree(gcdest)); 1612 if (parcolp) PetscCall(ISDestroy(&colp)); 1613 *B = Aperm; 1614 PetscFunctionReturn(0); 1615 } 1616 1617 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1618 { 1619 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1620 1621 PetscFunctionBegin; 1622 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1623 if (ghosts) *ghosts = aij->garray; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1628 { 1629 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1630 Mat A = mat->A, B = mat->B; 1631 PetscLogDouble isend[5], irecv[5]; 1632 1633 PetscFunctionBegin; 1634 info->block_size = 1.0; 1635 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1636 1637 isend[0] = info->nz_used; 1638 isend[1] = info->nz_allocated; 1639 isend[2] = info->nz_unneeded; 1640 isend[3] = info->memory; 1641 isend[4] = info->mallocs; 1642 1643 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1644 1645 isend[0] += info->nz_used; 1646 isend[1] += info->nz_allocated; 1647 isend[2] += info->nz_unneeded; 1648 isend[3] += info->memory; 1649 isend[4] += info->mallocs; 1650 if (flag == MAT_LOCAL) { 1651 info->nz_used = isend[0]; 1652 info->nz_allocated = isend[1]; 1653 info->nz_unneeded = isend[2]; 1654 info->memory = isend[3]; 1655 info->mallocs = isend[4]; 1656 } else if (flag == MAT_GLOBAL_MAX) { 1657 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1658 1659 info->nz_used = irecv[0]; 1660 info->nz_allocated = irecv[1]; 1661 info->nz_unneeded = irecv[2]; 1662 info->memory = irecv[3]; 1663 info->mallocs = irecv[4]; 1664 } else if (flag == MAT_GLOBAL_SUM) { 1665 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1666 1667 info->nz_used = irecv[0]; 1668 info->nz_allocated = irecv[1]; 1669 info->nz_unneeded = irecv[2]; 1670 info->memory = irecv[3]; 1671 info->mallocs = irecv[4]; 1672 } 1673 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1674 info->fill_ratio_needed = 0; 1675 info->factor_mallocs = 0; 1676 PetscFunctionReturn(0); 1677 } 1678 1679 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1680 { 1681 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1682 1683 PetscFunctionBegin; 1684 switch (op) { 1685 case MAT_NEW_NONZERO_LOCATIONS: 1686 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1687 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1688 case MAT_KEEP_NONZERO_PATTERN: 1689 case MAT_NEW_NONZERO_LOCATION_ERR: 1690 case MAT_USE_INODES: 1691 case MAT_IGNORE_ZERO_ENTRIES: 1692 case MAT_FORM_EXPLICIT_TRANSPOSE: 1693 MatCheckPreallocated(A, 1); 1694 PetscCall(MatSetOption(a->A, op, flg)); 1695 PetscCall(MatSetOption(a->B, op, flg)); 1696 break; 1697 case MAT_ROW_ORIENTED: 1698 MatCheckPreallocated(A, 1); 1699 a->roworiented = flg; 1700 1701 PetscCall(MatSetOption(a->A, op, flg)); 1702 PetscCall(MatSetOption(a->B, op, flg)); 1703 break; 1704 case MAT_FORCE_DIAGONAL_ENTRIES: 1705 case MAT_SORTED_FULL: 1706 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1707 break; 1708 case MAT_IGNORE_OFF_PROC_ENTRIES: 1709 a->donotstash = flg; 1710 break; 1711 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1712 case MAT_SPD: 1713 case MAT_SYMMETRIC: 1714 case MAT_STRUCTURALLY_SYMMETRIC: 1715 case MAT_HERMITIAN: 1716 case MAT_SYMMETRY_ETERNAL: 1717 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1718 case MAT_SPD_ETERNAL: 1719 /* if the diagonal matrix is square it inherits some of the properties above */ 1720 break; 1721 case MAT_SUBMAT_SINGLEIS: 1722 A->submat_singleis = flg; 1723 break; 1724 case MAT_STRUCTURE_ONLY: 1725 /* The option is handled directly by MatSetOption() */ 1726 break; 1727 default: 1728 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1729 } 1730 PetscFunctionReturn(0); 1731 } 1732 1733 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1734 { 1735 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1736 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1737 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1738 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1739 PetscInt *cmap, *idx_p; 1740 1741 PetscFunctionBegin; 1742 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1743 mat->getrowactive = PETSC_TRUE; 1744 1745 if (!mat->rowvalues && (idx || v)) { 1746 /* 1747 allocate enough space to hold information from the longest row. 1748 */ 1749 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1750 PetscInt max = 1, tmp; 1751 for (i = 0; i < matin->rmap->n; i++) { 1752 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1753 if (max < tmp) max = tmp; 1754 } 1755 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1756 } 1757 1758 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1759 lrow = row - rstart; 1760 1761 pvA = &vworkA; 1762 pcA = &cworkA; 1763 pvB = &vworkB; 1764 pcB = &cworkB; 1765 if (!v) { 1766 pvA = NULL; 1767 pvB = NULL; 1768 } 1769 if (!idx) { 1770 pcA = NULL; 1771 if (!v) pcB = NULL; 1772 } 1773 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1774 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1775 nztot = nzA + nzB; 1776 1777 cmap = mat->garray; 1778 if (v || idx) { 1779 if (nztot) { 1780 /* Sort by increasing column numbers, assuming A and B already sorted */ 1781 PetscInt imark = -1; 1782 if (v) { 1783 *v = v_p = mat->rowvalues; 1784 for (i = 0; i < nzB; i++) { 1785 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1786 else break; 1787 } 1788 imark = i; 1789 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1790 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1791 } 1792 if (idx) { 1793 *idx = idx_p = mat->rowindices; 1794 if (imark > -1) { 1795 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1796 } else { 1797 for (i = 0; i < nzB; i++) { 1798 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1799 else break; 1800 } 1801 imark = i; 1802 } 1803 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1804 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1805 } 1806 } else { 1807 if (idx) *idx = NULL; 1808 if (v) *v = NULL; 1809 } 1810 } 1811 *nz = nztot; 1812 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1813 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1818 { 1819 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1820 1821 PetscFunctionBegin; 1822 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1823 aij->getrowactive = PETSC_FALSE; 1824 PetscFunctionReturn(0); 1825 } 1826 1827 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1828 { 1829 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1830 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1831 PetscInt i, j, cstart = mat->cmap->rstart; 1832 PetscReal sum = 0.0; 1833 const MatScalar *v, *amata, *bmata; 1834 1835 PetscFunctionBegin; 1836 if (aij->size == 1) { 1837 PetscCall(MatNorm(aij->A, type, norm)); 1838 } else { 1839 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1840 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1841 if (type == NORM_FROBENIUS) { 1842 v = amata; 1843 for (i = 0; i < amat->nz; i++) { 1844 sum += PetscRealPart(PetscConj(*v) * (*v)); 1845 v++; 1846 } 1847 v = bmata; 1848 for (i = 0; i < bmat->nz; i++) { 1849 sum += PetscRealPart(PetscConj(*v) * (*v)); 1850 v++; 1851 } 1852 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1853 *norm = PetscSqrtReal(*norm); 1854 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1855 } else if (type == NORM_1) { /* max column norm */ 1856 PetscReal *tmp, *tmp2; 1857 PetscInt *jj, *garray = aij->garray; 1858 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1859 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1860 *norm = 0.0; 1861 v = amata; 1862 jj = amat->j; 1863 for (j = 0; j < amat->nz; j++) { 1864 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 v = bmata; 1868 jj = bmat->j; 1869 for (j = 0; j < bmat->nz; j++) { 1870 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1871 v++; 1872 } 1873 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1874 for (j = 0; j < mat->cmap->N; j++) { 1875 if (tmp2[j] > *norm) *norm = tmp2[j]; 1876 } 1877 PetscCall(PetscFree(tmp)); 1878 PetscCall(PetscFree(tmp2)); 1879 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1880 } else if (type == NORM_INFINITY) { /* max row norm */ 1881 PetscReal ntemp = 0.0; 1882 for (j = 0; j < aij->A->rmap->n; j++) { 1883 v = amata + amat->i[j]; 1884 sum = 0.0; 1885 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1886 sum += PetscAbsScalar(*v); 1887 v++; 1888 } 1889 v = bmata + bmat->i[j]; 1890 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1891 sum += PetscAbsScalar(*v); 1892 v++; 1893 } 1894 if (sum > ntemp) ntemp = sum; 1895 } 1896 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1897 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1898 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1899 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1900 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1901 } 1902 PetscFunctionReturn(0); 1903 } 1904 1905 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1906 { 1907 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1908 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1909 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1910 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1911 Mat B, A_diag, *B_diag; 1912 const MatScalar *pbv, *bv; 1913 1914 PetscFunctionBegin; 1915 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1916 ma = A->rmap->n; 1917 na = A->cmap->n; 1918 mb = a->B->rmap->n; 1919 nb = a->B->cmap->n; 1920 ai = Aloc->i; 1921 aj = Aloc->j; 1922 bi = Bloc->i; 1923 bj = Bloc->j; 1924 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1925 PetscInt *d_nnz, *g_nnz, *o_nnz; 1926 PetscSFNode *oloc; 1927 PETSC_UNUSED PetscSF sf; 1928 1929 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1930 /* compute d_nnz for preallocation */ 1931 PetscCall(PetscArrayzero(d_nnz, na)); 1932 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1933 /* compute local off-diagonal contributions */ 1934 PetscCall(PetscArrayzero(g_nnz, nb)); 1935 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1936 /* map those to global */ 1937 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1938 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1939 PetscCall(PetscSFSetFromOptions(sf)); 1940 PetscCall(PetscArrayzero(o_nnz, na)); 1941 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1942 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1943 PetscCall(PetscSFDestroy(&sf)); 1944 1945 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1946 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1947 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1948 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1949 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1950 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1951 } else { 1952 B = *matout; 1953 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1954 } 1955 1956 b = (Mat_MPIAIJ *)B->data; 1957 A_diag = a->A; 1958 B_diag = &b->A; 1959 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1960 A_diag_ncol = A_diag->cmap->N; 1961 B_diag_ilen = sub_B_diag->ilen; 1962 B_diag_i = sub_B_diag->i; 1963 1964 /* Set ilen for diagonal of B */ 1965 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1966 1967 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1968 very quickly (=without using MatSetValues), because all writes are local. */ 1969 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1970 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1971 1972 /* copy over the B part */ 1973 PetscCall(PetscMalloc1(bi[mb], &cols)); 1974 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1975 pbv = bv; 1976 row = A->rmap->rstart; 1977 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1978 cols_tmp = cols; 1979 for (i = 0; i < mb; i++) { 1980 ncol = bi[i + 1] - bi[i]; 1981 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1982 row++; 1983 pbv += ncol; 1984 cols_tmp += ncol; 1985 } 1986 PetscCall(PetscFree(cols)); 1987 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1988 1989 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1990 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1991 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1992 *matout = B; 1993 } else { 1994 PetscCall(MatHeaderMerge(A, &B)); 1995 } 1996 PetscFunctionReturn(0); 1997 } 1998 1999 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2002 Mat a = aij->A, b = aij->B; 2003 PetscInt s1, s2, s3; 2004 2005 PetscFunctionBegin; 2006 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2007 if (rr) { 2008 PetscCall(VecGetLocalSize(rr, &s1)); 2009 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2010 /* Overlap communication with computation. */ 2011 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2012 } 2013 if (ll) { 2014 PetscCall(VecGetLocalSize(ll, &s1)); 2015 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2016 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2017 } 2018 /* scale the diagonal block */ 2019 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2020 2021 if (rr) { 2022 /* Do a scatter end and then right scale the off-diagonal block */ 2023 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2024 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2025 } 2026 PetscFunctionReturn(0); 2027 } 2028 2029 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2030 { 2031 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2032 2033 PetscFunctionBegin; 2034 PetscCall(MatSetUnfactored(a->A)); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2039 { 2040 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2041 Mat a, b, c, d; 2042 PetscBool flg; 2043 2044 PetscFunctionBegin; 2045 a = matA->A; 2046 b = matA->B; 2047 c = matB->A; 2048 d = matB->B; 2049 2050 PetscCall(MatEqual(a, c, &flg)); 2051 if (flg) PetscCall(MatEqual(b, d, &flg)); 2052 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2057 { 2058 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2059 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2060 2061 PetscFunctionBegin; 2062 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2063 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2064 /* because of the column compression in the off-processor part of the matrix a->B, 2065 the number of columns in a->B and b->B may be different, hence we cannot call 2066 the MatCopy() directly on the two parts. If need be, we can provide a more 2067 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2068 then copying the submatrices */ 2069 PetscCall(MatCopy_Basic(A, B, str)); 2070 } else { 2071 PetscCall(MatCopy(a->A, b->A, str)); 2072 PetscCall(MatCopy(a->B, b->B, str)); 2073 } 2074 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2079 { 2080 PetscFunctionBegin; 2081 PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 2082 PetscFunctionReturn(0); 2083 } 2084 2085 /* 2086 Computes the number of nonzeros per row needed for preallocation when X and Y 2087 have different nonzero structure. 2088 */ 2089 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2090 { 2091 PetscInt i, j, k, nzx, nzy; 2092 2093 PetscFunctionBegin; 2094 /* Set the number of nonzeros in the new matrix */ 2095 for (i = 0; i < m; i++) { 2096 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2097 nzx = xi[i + 1] - xi[i]; 2098 nzy = yi[i + 1] - yi[i]; 2099 nnz[i] = 0; 2100 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2101 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2102 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2103 nnz[i]++; 2104 } 2105 for (; k < nzy; k++) nnz[i]++; 2106 } 2107 PetscFunctionReturn(0); 2108 } 2109 2110 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2111 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2112 { 2113 PetscInt m = Y->rmap->N; 2114 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2115 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2116 2117 PetscFunctionBegin; 2118 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2123 { 2124 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2125 2126 PetscFunctionBegin; 2127 if (str == SAME_NONZERO_PATTERN) { 2128 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2129 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2130 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2131 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2132 } else { 2133 Mat B; 2134 PetscInt *nnz_d, *nnz_o; 2135 2136 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2137 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2138 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2139 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2140 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2141 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2142 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2143 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2144 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2145 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2146 PetscCall(MatHeaderMerge(Y, &B)); 2147 PetscCall(PetscFree(nnz_d)); 2148 PetscCall(PetscFree(nnz_o)); 2149 } 2150 PetscFunctionReturn(0); 2151 } 2152 2153 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2154 2155 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2156 { 2157 PetscFunctionBegin; 2158 if (PetscDefined(USE_COMPLEX)) { 2159 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2160 2161 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2162 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2168 { 2169 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2170 2171 PetscFunctionBegin; 2172 PetscCall(MatRealPart(a->A)); 2173 PetscCall(MatRealPart(a->B)); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2178 { 2179 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2180 2181 PetscFunctionBegin; 2182 PetscCall(MatImaginaryPart(a->A)); 2183 PetscCall(MatImaginaryPart(a->B)); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2188 { 2189 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2190 PetscInt i, *idxb = NULL, m = A->rmap->n; 2191 PetscScalar *va, *vv; 2192 Vec vB, vA; 2193 const PetscScalar *vb; 2194 2195 PetscFunctionBegin; 2196 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2197 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2198 2199 PetscCall(VecGetArrayWrite(vA, &va)); 2200 if (idx) { 2201 for (i = 0; i < m; i++) { 2202 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2203 } 2204 } 2205 2206 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2207 PetscCall(PetscMalloc1(m, &idxb)); 2208 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2209 2210 PetscCall(VecGetArrayWrite(v, &vv)); 2211 PetscCall(VecGetArrayRead(vB, &vb)); 2212 for (i = 0; i < m; i++) { 2213 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2214 vv[i] = vb[i]; 2215 if (idx) idx[i] = a->garray[idxb[i]]; 2216 } else { 2217 vv[i] = va[i]; 2218 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2219 } 2220 } 2221 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2222 PetscCall(VecRestoreArrayWrite(vA, &va)); 2223 PetscCall(VecRestoreArrayRead(vB, &vb)); 2224 PetscCall(PetscFree(idxb)); 2225 PetscCall(VecDestroy(&vA)); 2226 PetscCall(VecDestroy(&vB)); 2227 PetscFunctionReturn(0); 2228 } 2229 2230 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2231 { 2232 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2233 PetscInt m = A->rmap->n, n = A->cmap->n; 2234 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2235 PetscInt *cmap = mat->garray; 2236 PetscInt *diagIdx, *offdiagIdx; 2237 Vec diagV, offdiagV; 2238 PetscScalar *a, *diagA, *offdiagA; 2239 const PetscScalar *ba, *bav; 2240 PetscInt r, j, col, ncols, *bi, *bj; 2241 Mat B = mat->B; 2242 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2243 2244 PetscFunctionBegin; 2245 /* When a process holds entire A and other processes have no entry */ 2246 if (A->cmap->N == n) { 2247 PetscCall(VecGetArrayWrite(v, &diagA)); 2248 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2250 PetscCall(VecDestroy(&diagV)); 2251 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2252 PetscFunctionReturn(0); 2253 } else if (n == 0) { 2254 if (m) { 2255 PetscCall(VecGetArrayWrite(v, &a)); 2256 for (r = 0; r < m; r++) { 2257 a[r] = 0.0; 2258 if (idx) idx[r] = -1; 2259 } 2260 PetscCall(VecRestoreArrayWrite(v, &a)); 2261 } 2262 PetscFunctionReturn(0); 2263 } 2264 2265 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2266 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2267 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2268 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2269 2270 /* Get offdiagIdx[] for implicit 0.0 */ 2271 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2272 ba = bav; 2273 bi = b->i; 2274 bj = b->j; 2275 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2276 for (r = 0; r < m; r++) { 2277 ncols = bi[r + 1] - bi[r]; 2278 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2279 offdiagA[r] = *ba; 2280 offdiagIdx[r] = cmap[0]; 2281 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2282 offdiagA[r] = 0.0; 2283 2284 /* Find first hole in the cmap */ 2285 for (j = 0; j < ncols; j++) { 2286 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2287 if (col > j && j < cstart) { 2288 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2289 break; 2290 } else if (col > j + n && j >= cstart) { 2291 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2292 break; 2293 } 2294 } 2295 if (j == ncols && ncols < A->cmap->N - n) { 2296 /* a hole is outside compressed Bcols */ 2297 if (ncols == 0) { 2298 if (cstart) { 2299 offdiagIdx[r] = 0; 2300 } else offdiagIdx[r] = cend; 2301 } else { /* ncols > 0 */ 2302 offdiagIdx[r] = cmap[ncols - 1] + 1; 2303 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2304 } 2305 } 2306 } 2307 2308 for (j = 0; j < ncols; j++) { 2309 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2310 offdiagA[r] = *ba; 2311 offdiagIdx[r] = cmap[*bj]; 2312 } 2313 ba++; 2314 bj++; 2315 } 2316 } 2317 2318 PetscCall(VecGetArrayWrite(v, &a)); 2319 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2320 for (r = 0; r < m; ++r) { 2321 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 if (idx) idx[r] = cstart + diagIdx[r]; 2324 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2325 a[r] = diagA[r]; 2326 if (idx) { 2327 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2328 idx[r] = cstart + diagIdx[r]; 2329 } else idx[r] = offdiagIdx[r]; 2330 } 2331 } else { 2332 a[r] = offdiagA[r]; 2333 if (idx) idx[r] = offdiagIdx[r]; 2334 } 2335 } 2336 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2337 PetscCall(VecRestoreArrayWrite(v, &a)); 2338 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2339 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2340 PetscCall(VecDestroy(&diagV)); 2341 PetscCall(VecDestroy(&offdiagV)); 2342 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2343 PetscFunctionReturn(0); 2344 } 2345 2346 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2347 { 2348 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2349 PetscInt m = A->rmap->n, n = A->cmap->n; 2350 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2351 PetscInt *cmap = mat->garray; 2352 PetscInt *diagIdx, *offdiagIdx; 2353 Vec diagV, offdiagV; 2354 PetscScalar *a, *diagA, *offdiagA; 2355 const PetscScalar *ba, *bav; 2356 PetscInt r, j, col, ncols, *bi, *bj; 2357 Mat B = mat->B; 2358 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2359 2360 PetscFunctionBegin; 2361 /* When a process holds entire A and other processes have no entry */ 2362 if (A->cmap->N == n) { 2363 PetscCall(VecGetArrayWrite(v, &diagA)); 2364 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2365 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2366 PetscCall(VecDestroy(&diagV)); 2367 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2368 PetscFunctionReturn(0); 2369 } else if (n == 0) { 2370 if (m) { 2371 PetscCall(VecGetArrayWrite(v, &a)); 2372 for (r = 0; r < m; r++) { 2373 a[r] = PETSC_MAX_REAL; 2374 if (idx) idx[r] = -1; 2375 } 2376 PetscCall(VecRestoreArrayWrite(v, &a)); 2377 } 2378 PetscFunctionReturn(0); 2379 } 2380 2381 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2382 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2383 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2384 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2385 2386 /* Get offdiagIdx[] for implicit 0.0 */ 2387 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2388 ba = bav; 2389 bi = b->i; 2390 bj = b->j; 2391 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2392 for (r = 0; r < m; r++) { 2393 ncols = bi[r + 1] - bi[r]; 2394 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2395 offdiagA[r] = *ba; 2396 offdiagIdx[r] = cmap[0]; 2397 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2398 offdiagA[r] = 0.0; 2399 2400 /* Find first hole in the cmap */ 2401 for (j = 0; j < ncols; j++) { 2402 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2403 if (col > j && j < cstart) { 2404 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2405 break; 2406 } else if (col > j + n && j >= cstart) { 2407 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2408 break; 2409 } 2410 } 2411 if (j == ncols && ncols < A->cmap->N - n) { 2412 /* a hole is outside compressed Bcols */ 2413 if (ncols == 0) { 2414 if (cstart) { 2415 offdiagIdx[r] = 0; 2416 } else offdiagIdx[r] = cend; 2417 } else { /* ncols > 0 */ 2418 offdiagIdx[r] = cmap[ncols - 1] + 1; 2419 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2420 } 2421 } 2422 } 2423 2424 for (j = 0; j < ncols; j++) { 2425 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2426 offdiagA[r] = *ba; 2427 offdiagIdx[r] = cmap[*bj]; 2428 } 2429 ba++; 2430 bj++; 2431 } 2432 } 2433 2434 PetscCall(VecGetArrayWrite(v, &a)); 2435 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2436 for (r = 0; r < m; ++r) { 2437 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2438 a[r] = diagA[r]; 2439 if (idx) idx[r] = cstart + diagIdx[r]; 2440 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2441 a[r] = diagA[r]; 2442 if (idx) { 2443 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2444 idx[r] = cstart + diagIdx[r]; 2445 } else idx[r] = offdiagIdx[r]; 2446 } 2447 } else { 2448 a[r] = offdiagA[r]; 2449 if (idx) idx[r] = offdiagIdx[r]; 2450 } 2451 } 2452 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2453 PetscCall(VecRestoreArrayWrite(v, &a)); 2454 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2455 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2456 PetscCall(VecDestroy(&diagV)); 2457 PetscCall(VecDestroy(&offdiagV)); 2458 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2459 PetscFunctionReturn(0); 2460 } 2461 2462 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2463 { 2464 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2465 PetscInt m = A->rmap->n, n = A->cmap->n; 2466 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2467 PetscInt *cmap = mat->garray; 2468 PetscInt *diagIdx, *offdiagIdx; 2469 Vec diagV, offdiagV; 2470 PetscScalar *a, *diagA, *offdiagA; 2471 const PetscScalar *ba, *bav; 2472 PetscInt r, j, col, ncols, *bi, *bj; 2473 Mat B = mat->B; 2474 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2475 2476 PetscFunctionBegin; 2477 /* When a process holds entire A and other processes have no entry */ 2478 if (A->cmap->N == n) { 2479 PetscCall(VecGetArrayWrite(v, &diagA)); 2480 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2481 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2482 PetscCall(VecDestroy(&diagV)); 2483 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2484 PetscFunctionReturn(0); 2485 } else if (n == 0) { 2486 if (m) { 2487 PetscCall(VecGetArrayWrite(v, &a)); 2488 for (r = 0; r < m; r++) { 2489 a[r] = PETSC_MIN_REAL; 2490 if (idx) idx[r] = -1; 2491 } 2492 PetscCall(VecRestoreArrayWrite(v, &a)); 2493 } 2494 PetscFunctionReturn(0); 2495 } 2496 2497 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2498 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2499 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2500 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2501 2502 /* Get offdiagIdx[] for implicit 0.0 */ 2503 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2504 ba = bav; 2505 bi = b->i; 2506 bj = b->j; 2507 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2508 for (r = 0; r < m; r++) { 2509 ncols = bi[r + 1] - bi[r]; 2510 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2511 offdiagA[r] = *ba; 2512 offdiagIdx[r] = cmap[0]; 2513 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2514 offdiagA[r] = 0.0; 2515 2516 /* Find first hole in the cmap */ 2517 for (j = 0; j < ncols; j++) { 2518 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2519 if (col > j && j < cstart) { 2520 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2521 break; 2522 } else if (col > j + n && j >= cstart) { 2523 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2524 break; 2525 } 2526 } 2527 if (j == ncols && ncols < A->cmap->N - n) { 2528 /* a hole is outside compressed Bcols */ 2529 if (ncols == 0) { 2530 if (cstart) { 2531 offdiagIdx[r] = 0; 2532 } else offdiagIdx[r] = cend; 2533 } else { /* ncols > 0 */ 2534 offdiagIdx[r] = cmap[ncols - 1] + 1; 2535 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2536 } 2537 } 2538 } 2539 2540 for (j = 0; j < ncols; j++) { 2541 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2542 offdiagA[r] = *ba; 2543 offdiagIdx[r] = cmap[*bj]; 2544 } 2545 ba++; 2546 bj++; 2547 } 2548 } 2549 2550 PetscCall(VecGetArrayWrite(v, &a)); 2551 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2552 for (r = 0; r < m; ++r) { 2553 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2554 a[r] = diagA[r]; 2555 if (idx) idx[r] = cstart + diagIdx[r]; 2556 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2557 a[r] = diagA[r]; 2558 if (idx) { 2559 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2560 idx[r] = cstart + diagIdx[r]; 2561 } else idx[r] = offdiagIdx[r]; 2562 } 2563 } else { 2564 a[r] = offdiagA[r]; 2565 if (idx) idx[r] = offdiagIdx[r]; 2566 } 2567 } 2568 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2569 PetscCall(VecRestoreArrayWrite(v, &a)); 2570 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2571 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2572 PetscCall(VecDestroy(&diagV)); 2573 PetscCall(VecDestroy(&offdiagV)); 2574 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2575 PetscFunctionReturn(0); 2576 } 2577 2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2579 { 2580 Mat *dummy; 2581 2582 PetscFunctionBegin; 2583 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2584 *newmat = *dummy; 2585 PetscCall(PetscFree(dummy)); 2586 PetscFunctionReturn(0); 2587 } 2588 2589 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2590 { 2591 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2592 2593 PetscFunctionBegin; 2594 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2595 A->factorerrortype = a->A->factorerrortype; 2596 PetscFunctionReturn(0); 2597 } 2598 2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2600 { 2601 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2602 2603 PetscFunctionBegin; 2604 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2605 PetscCall(MatSetRandom(aij->A, rctx)); 2606 if (x->assembled) { 2607 PetscCall(MatSetRandom(aij->B, rctx)); 2608 } else { 2609 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2610 } 2611 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2612 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2613 PetscFunctionReturn(0); 2614 } 2615 2616 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2617 { 2618 PetscFunctionBegin; 2619 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2620 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2621 PetscFunctionReturn(0); 2622 } 2623 2624 /*@ 2625 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2626 2627 Not collective 2628 2629 Input Parameter: 2630 . A - the matrix 2631 2632 Output Parameter: 2633 . nz - the number of nonzeros 2634 2635 Level: advanced 2636 2637 .seealso: `MATMPIAIJ`, `Mat` 2638 @*/ 2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2640 { 2641 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2642 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2643 2644 PetscFunctionBegin; 2645 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2646 PetscFunctionReturn(0); 2647 } 2648 2649 /*@ 2650 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2651 2652 Collective 2653 2654 Input Parameters: 2655 + A - the matrix 2656 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2657 2658 Level: advanced 2659 2660 @*/ 2661 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2662 { 2663 PetscFunctionBegin; 2664 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2665 PetscFunctionReturn(0); 2666 } 2667 2668 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2669 { 2670 PetscBool sc = PETSC_FALSE, flg; 2671 2672 PetscFunctionBegin; 2673 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2674 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2675 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2676 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2677 PetscOptionsHeadEnd(); 2678 PetscFunctionReturn(0); 2679 } 2680 2681 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2682 { 2683 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2684 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2685 2686 PetscFunctionBegin; 2687 if (!Y->preallocated) { 2688 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2689 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2690 PetscInt nonew = aij->nonew; 2691 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2692 aij->nonew = nonew; 2693 } 2694 PetscCall(MatShift_Basic(Y, a)); 2695 PetscFunctionReturn(0); 2696 } 2697 2698 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2699 { 2700 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2701 2702 PetscFunctionBegin; 2703 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2704 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2705 if (d) { 2706 PetscInt rstart; 2707 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2708 *d += rstart; 2709 } 2710 PetscFunctionReturn(0); 2711 } 2712 2713 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2714 { 2715 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2716 2717 PetscFunctionBegin; 2718 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2719 PetscFunctionReturn(0); 2720 } 2721 2722 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2723 { 2724 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2725 2726 PetscFunctionBegin; 2727 PetscCall(MatEliminateZeros(a->A)); 2728 PetscCall(MatEliminateZeros(a->B)); 2729 PetscFunctionReturn(0); 2730 } 2731 2732 /* -------------------------------------------------------------------*/ 2733 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2734 MatGetRow_MPIAIJ, 2735 MatRestoreRow_MPIAIJ, 2736 MatMult_MPIAIJ, 2737 /* 4*/ MatMultAdd_MPIAIJ, 2738 MatMultTranspose_MPIAIJ, 2739 MatMultTransposeAdd_MPIAIJ, 2740 NULL, 2741 NULL, 2742 NULL, 2743 /*10*/ NULL, 2744 NULL, 2745 NULL, 2746 MatSOR_MPIAIJ, 2747 MatTranspose_MPIAIJ, 2748 /*15*/ MatGetInfo_MPIAIJ, 2749 MatEqual_MPIAIJ, 2750 MatGetDiagonal_MPIAIJ, 2751 MatDiagonalScale_MPIAIJ, 2752 MatNorm_MPIAIJ, 2753 /*20*/ MatAssemblyBegin_MPIAIJ, 2754 MatAssemblyEnd_MPIAIJ, 2755 MatSetOption_MPIAIJ, 2756 MatZeroEntries_MPIAIJ, 2757 /*24*/ MatZeroRows_MPIAIJ, 2758 NULL, 2759 NULL, 2760 NULL, 2761 NULL, 2762 /*29*/ MatSetUp_MPIAIJ, 2763 NULL, 2764 NULL, 2765 MatGetDiagonalBlock_MPIAIJ, 2766 NULL, 2767 /*34*/ MatDuplicate_MPIAIJ, 2768 NULL, 2769 NULL, 2770 NULL, 2771 NULL, 2772 /*39*/ MatAXPY_MPIAIJ, 2773 MatCreateSubMatrices_MPIAIJ, 2774 MatIncreaseOverlap_MPIAIJ, 2775 MatGetValues_MPIAIJ, 2776 MatCopy_MPIAIJ, 2777 /*44*/ MatGetRowMax_MPIAIJ, 2778 MatScale_MPIAIJ, 2779 MatShift_MPIAIJ, 2780 MatDiagonalSet_MPIAIJ, 2781 MatZeroRowsColumns_MPIAIJ, 2782 /*49*/ MatSetRandom_MPIAIJ, 2783 MatGetRowIJ_MPIAIJ, 2784 MatRestoreRowIJ_MPIAIJ, 2785 NULL, 2786 NULL, 2787 /*54*/ MatFDColoringCreate_MPIXAIJ, 2788 NULL, 2789 MatSetUnfactored_MPIAIJ, 2790 MatPermute_MPIAIJ, 2791 NULL, 2792 /*59*/ MatCreateSubMatrix_MPIAIJ, 2793 MatDestroy_MPIAIJ, 2794 MatView_MPIAIJ, 2795 NULL, 2796 NULL, 2797 /*64*/ NULL, 2798 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2799 NULL, 2800 NULL, 2801 NULL, 2802 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2803 MatGetRowMinAbs_MPIAIJ, 2804 NULL, 2805 NULL, 2806 NULL, 2807 NULL, 2808 /*75*/ MatFDColoringApply_AIJ, 2809 MatSetFromOptions_MPIAIJ, 2810 NULL, 2811 NULL, 2812 MatFindZeroDiagonals_MPIAIJ, 2813 /*80*/ NULL, 2814 NULL, 2815 NULL, 2816 /*83*/ MatLoad_MPIAIJ, 2817 MatIsSymmetric_MPIAIJ, 2818 NULL, 2819 NULL, 2820 NULL, 2821 NULL, 2822 /*89*/ NULL, 2823 NULL, 2824 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2825 NULL, 2826 NULL, 2827 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2828 NULL, 2829 NULL, 2830 NULL, 2831 MatBindToCPU_MPIAIJ, 2832 /*99*/ MatProductSetFromOptions_MPIAIJ, 2833 NULL, 2834 NULL, 2835 MatConjugate_MPIAIJ, 2836 NULL, 2837 /*104*/ MatSetValuesRow_MPIAIJ, 2838 MatRealPart_MPIAIJ, 2839 MatImaginaryPart_MPIAIJ, 2840 NULL, 2841 NULL, 2842 /*109*/ NULL, 2843 NULL, 2844 MatGetRowMin_MPIAIJ, 2845 NULL, 2846 MatMissingDiagonal_MPIAIJ, 2847 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2848 NULL, 2849 MatGetGhosts_MPIAIJ, 2850 NULL, 2851 NULL, 2852 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2853 NULL, 2854 NULL, 2855 NULL, 2856 MatGetMultiProcBlock_MPIAIJ, 2857 /*124*/ MatFindNonzeroRows_MPIAIJ, 2858 MatGetColumnReductions_MPIAIJ, 2859 MatInvertBlockDiagonal_MPIAIJ, 2860 MatInvertVariableBlockDiagonal_MPIAIJ, 2861 MatCreateSubMatricesMPI_MPIAIJ, 2862 /*129*/ NULL, 2863 NULL, 2864 NULL, 2865 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2866 NULL, 2867 /*134*/ NULL, 2868 NULL, 2869 NULL, 2870 NULL, 2871 NULL, 2872 /*139*/ MatSetBlockSizes_MPIAIJ, 2873 NULL, 2874 NULL, 2875 MatFDColoringSetUp_MPIXAIJ, 2876 MatFindOffBlockDiagonalEntries_MPIAIJ, 2877 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2878 /*145*/ NULL, 2879 NULL, 2880 NULL, 2881 MatCreateGraph_Simple_AIJ, 2882 NULL, 2883 /*150*/ NULL, 2884 MatEliminateZeros_MPIAIJ}; 2885 2886 /* ----------------------------------------------------------------------------------------*/ 2887 2888 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2889 { 2890 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2891 2892 PetscFunctionBegin; 2893 PetscCall(MatStoreValues(aij->A)); 2894 PetscCall(MatStoreValues(aij->B)); 2895 PetscFunctionReturn(0); 2896 } 2897 2898 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2899 { 2900 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2901 2902 PetscFunctionBegin; 2903 PetscCall(MatRetrieveValues(aij->A)); 2904 PetscCall(MatRetrieveValues(aij->B)); 2905 PetscFunctionReturn(0); 2906 } 2907 2908 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2909 { 2910 Mat_MPIAIJ *b; 2911 PetscMPIInt size; 2912 2913 PetscFunctionBegin; 2914 PetscCall(PetscLayoutSetUp(B->rmap)); 2915 PetscCall(PetscLayoutSetUp(B->cmap)); 2916 b = (Mat_MPIAIJ *)B->data; 2917 2918 #if defined(PETSC_USE_CTABLE) 2919 PetscCall(PetscHMapIDestroy(&b->colmap)); 2920 #else 2921 PetscCall(PetscFree(b->colmap)); 2922 #endif 2923 PetscCall(PetscFree(b->garray)); 2924 PetscCall(VecDestroy(&b->lvec)); 2925 PetscCall(VecScatterDestroy(&b->Mvctx)); 2926 2927 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2928 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2929 PetscCall(MatDestroy(&b->B)); 2930 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2931 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2932 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2933 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2934 2935 if (!B->preallocated) { 2936 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2937 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2938 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2939 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2940 } 2941 2942 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2943 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2944 B->preallocated = PETSC_TRUE; 2945 B->was_assembled = PETSC_FALSE; 2946 B->assembled = PETSC_FALSE; 2947 PetscFunctionReturn(0); 2948 } 2949 2950 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2951 { 2952 Mat_MPIAIJ *b; 2953 2954 PetscFunctionBegin; 2955 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2956 PetscCall(PetscLayoutSetUp(B->rmap)); 2957 PetscCall(PetscLayoutSetUp(B->cmap)); 2958 b = (Mat_MPIAIJ *)B->data; 2959 2960 #if defined(PETSC_USE_CTABLE) 2961 PetscCall(PetscHMapIDestroy(&b->colmap)); 2962 #else 2963 PetscCall(PetscFree(b->colmap)); 2964 #endif 2965 PetscCall(PetscFree(b->garray)); 2966 PetscCall(VecDestroy(&b->lvec)); 2967 PetscCall(VecScatterDestroy(&b->Mvctx)); 2968 2969 PetscCall(MatResetPreallocation(b->A)); 2970 PetscCall(MatResetPreallocation(b->B)); 2971 B->preallocated = PETSC_TRUE; 2972 B->was_assembled = PETSC_FALSE; 2973 B->assembled = PETSC_FALSE; 2974 PetscFunctionReturn(0); 2975 } 2976 2977 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2978 { 2979 Mat mat; 2980 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2981 2982 PetscFunctionBegin; 2983 *newmat = NULL; 2984 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2985 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2986 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2987 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2988 a = (Mat_MPIAIJ *)mat->data; 2989 2990 mat->factortype = matin->factortype; 2991 mat->assembled = matin->assembled; 2992 mat->insertmode = NOT_SET_VALUES; 2993 mat->preallocated = matin->preallocated; 2994 2995 a->size = oldmat->size; 2996 a->rank = oldmat->rank; 2997 a->donotstash = oldmat->donotstash; 2998 a->roworiented = oldmat->roworiented; 2999 a->rowindices = NULL; 3000 a->rowvalues = NULL; 3001 a->getrowactive = PETSC_FALSE; 3002 3003 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3004 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3005 3006 if (oldmat->colmap) { 3007 #if defined(PETSC_USE_CTABLE) 3008 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3009 #else 3010 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3011 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3012 #endif 3013 } else a->colmap = NULL; 3014 if (oldmat->garray) { 3015 PetscInt len; 3016 len = oldmat->B->cmap->n; 3017 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3018 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3019 } else a->garray = NULL; 3020 3021 /* It may happen MatDuplicate is called with a non-assembled matrix 3022 In fact, MatDuplicate only requires the matrix to be preallocated 3023 This may happen inside a DMCreateMatrix_Shell */ 3024 if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); } 3025 if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); } 3026 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3027 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3028 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3029 *newmat = mat; 3030 PetscFunctionReturn(0); 3031 } 3032 3033 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3034 { 3035 PetscBool isbinary, ishdf5; 3036 3037 PetscFunctionBegin; 3038 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3039 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3040 /* force binary viewer to load .info file if it has not yet done so */ 3041 PetscCall(PetscViewerSetUp(viewer)); 3042 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3043 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3044 if (isbinary) { 3045 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3046 } else if (ishdf5) { 3047 #if defined(PETSC_HAVE_HDF5) 3048 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3049 #else 3050 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3051 #endif 3052 } else { 3053 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3054 } 3055 PetscFunctionReturn(0); 3056 } 3057 3058 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3059 { 3060 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3061 PetscInt *rowidxs, *colidxs; 3062 PetscScalar *matvals; 3063 3064 PetscFunctionBegin; 3065 PetscCall(PetscViewerSetUp(viewer)); 3066 3067 /* read in matrix header */ 3068 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3069 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3070 M = header[1]; 3071 N = header[2]; 3072 nz = header[3]; 3073 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3074 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3075 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3076 3077 /* set block sizes from the viewer's .info file */ 3078 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3079 /* set global sizes if not set already */ 3080 if (mat->rmap->N < 0) mat->rmap->N = M; 3081 if (mat->cmap->N < 0) mat->cmap->N = N; 3082 PetscCall(PetscLayoutSetUp(mat->rmap)); 3083 PetscCall(PetscLayoutSetUp(mat->cmap)); 3084 3085 /* check if the matrix sizes are correct */ 3086 PetscCall(MatGetSize(mat, &rows, &cols)); 3087 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3088 3089 /* read in row lengths and build row indices */ 3090 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3091 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3092 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3093 rowidxs[0] = 0; 3094 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3095 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3096 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3097 /* read in column indices and matrix values */ 3098 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3100 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3101 /* store matrix indices and values */ 3102 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3103 PetscCall(PetscFree(rowidxs)); 3104 PetscCall(PetscFree2(colidxs, matvals)); 3105 PetscFunctionReturn(0); 3106 } 3107 3108 /* Not scalable because of ISAllGather() unless getting all columns. */ 3109 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3110 { 3111 IS iscol_local; 3112 PetscBool isstride; 3113 PetscMPIInt lisstride = 0, gisstride; 3114 3115 PetscFunctionBegin; 3116 /* check if we are grabbing all columns*/ 3117 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3118 3119 if (isstride) { 3120 PetscInt start, len, mstart, mlen; 3121 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3122 PetscCall(ISGetLocalSize(iscol, &len)); 3123 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3124 if (mstart == start && mlen - mstart == len) lisstride = 1; 3125 } 3126 3127 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3128 if (gisstride) { 3129 PetscInt N; 3130 PetscCall(MatGetSize(mat, NULL, &N)); 3131 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3132 PetscCall(ISSetIdentity(iscol_local)); 3133 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3134 } else { 3135 PetscInt cbs; 3136 PetscCall(ISGetBlockSize(iscol, &cbs)); 3137 PetscCall(ISAllGather(iscol, &iscol_local)); 3138 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3139 } 3140 3141 *isseq = iscol_local; 3142 PetscFunctionReturn(0); 3143 } 3144 3145 /* 3146 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3147 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3148 3149 Input Parameters: 3150 mat - matrix 3151 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3152 i.e., mat->rstart <= isrow[i] < mat->rend 3153 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3154 i.e., mat->cstart <= iscol[i] < mat->cend 3155 Output Parameter: 3156 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3157 iscol_o - sequential column index set for retrieving mat->B 3158 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3159 */ 3160 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3161 { 3162 Vec x, cmap; 3163 const PetscInt *is_idx; 3164 PetscScalar *xarray, *cmaparray; 3165 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3167 Mat B = a->B; 3168 Vec lvec = a->lvec, lcmap; 3169 PetscInt i, cstart, cend, Bn = B->cmap->N; 3170 MPI_Comm comm; 3171 VecScatter Mvctx = a->Mvctx; 3172 3173 PetscFunctionBegin; 3174 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3175 PetscCall(ISGetLocalSize(iscol, &ncols)); 3176 3177 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3178 PetscCall(MatCreateVecs(mat, &x, NULL)); 3179 PetscCall(VecSet(x, -1.0)); 3180 PetscCall(VecDuplicate(x, &cmap)); 3181 PetscCall(VecSet(cmap, -1.0)); 3182 3183 /* Get start indices */ 3184 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3185 isstart -= ncols; 3186 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3187 3188 PetscCall(ISGetIndices(iscol, &is_idx)); 3189 PetscCall(VecGetArray(x, &xarray)); 3190 PetscCall(VecGetArray(cmap, &cmaparray)); 3191 PetscCall(PetscMalloc1(ncols, &idx)); 3192 for (i = 0; i < ncols; i++) { 3193 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3194 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3195 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3196 } 3197 PetscCall(VecRestoreArray(x, &xarray)); 3198 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3199 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3200 3201 /* Get iscol_d */ 3202 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3203 PetscCall(ISGetBlockSize(iscol, &i)); 3204 PetscCall(ISSetBlockSize(*iscol_d, i)); 3205 3206 /* Get isrow_d */ 3207 PetscCall(ISGetLocalSize(isrow, &m)); 3208 rstart = mat->rmap->rstart; 3209 PetscCall(PetscMalloc1(m, &idx)); 3210 PetscCall(ISGetIndices(isrow, &is_idx)); 3211 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3212 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3213 3214 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3215 PetscCall(ISGetBlockSize(isrow, &i)); 3216 PetscCall(ISSetBlockSize(*isrow_d, i)); 3217 3218 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3219 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3220 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 3222 PetscCall(VecDuplicate(lvec, &lcmap)); 3223 3224 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3225 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 3227 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3228 /* off-process column indices */ 3229 count = 0; 3230 PetscCall(PetscMalloc1(Bn, &idx)); 3231 PetscCall(PetscMalloc1(Bn, &cmap1)); 3232 3233 PetscCall(VecGetArray(lvec, &xarray)); 3234 PetscCall(VecGetArray(lcmap, &cmaparray)); 3235 for (i = 0; i < Bn; i++) { 3236 if (PetscRealPart(xarray[i]) > -1.0) { 3237 idx[count] = i; /* local column index in off-diagonal part B */ 3238 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3239 count++; 3240 } 3241 } 3242 PetscCall(VecRestoreArray(lvec, &xarray)); 3243 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3244 3245 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3246 /* cannot ensure iscol_o has same blocksize as iscol! */ 3247 3248 PetscCall(PetscFree(idx)); 3249 *garray = cmap1; 3250 3251 PetscCall(VecDestroy(&x)); 3252 PetscCall(VecDestroy(&cmap)); 3253 PetscCall(VecDestroy(&lcmap)); 3254 PetscFunctionReturn(0); 3255 } 3256 3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3259 { 3260 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3261 Mat M = NULL; 3262 MPI_Comm comm; 3263 IS iscol_d, isrow_d, iscol_o; 3264 Mat Asub = NULL, Bsub = NULL; 3265 PetscInt n; 3266 3267 PetscFunctionBegin; 3268 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3269 3270 if (call == MAT_REUSE_MATRIX) { 3271 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3272 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3273 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3274 3275 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3276 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3277 3278 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3279 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3280 3281 /* Update diagonal and off-diagonal portions of submat */ 3282 asub = (Mat_MPIAIJ *)(*submat)->data; 3283 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3284 PetscCall(ISGetLocalSize(iscol_o, &n)); 3285 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3286 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3287 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3288 3289 } else { /* call == MAT_INITIAL_MATRIX) */ 3290 const PetscInt *garray; 3291 PetscInt BsubN; 3292 3293 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3294 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3295 3296 /* Create local submatrices Asub and Bsub */ 3297 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3299 3300 /* Create submatrix M */ 3301 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3302 3303 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3304 asub = (Mat_MPIAIJ *)M->data; 3305 3306 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3307 n = asub->B->cmap->N; 3308 if (BsubN > n) { 3309 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3310 const PetscInt *idx; 3311 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3312 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3313 3314 PetscCall(PetscMalloc1(n, &idx_new)); 3315 j = 0; 3316 PetscCall(ISGetIndices(iscol_o, &idx)); 3317 for (i = 0; i < n; i++) { 3318 if (j >= BsubN) break; 3319 while (subgarray[i] > garray[j]) j++; 3320 3321 if (subgarray[i] == garray[j]) { 3322 idx_new[i] = idx[j++]; 3323 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3324 } 3325 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3326 3327 PetscCall(ISDestroy(&iscol_o)); 3328 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3329 3330 } else if (BsubN < n) { 3331 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3332 } 3333 3334 PetscCall(PetscFree(garray)); 3335 *submat = M; 3336 3337 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3338 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3339 PetscCall(ISDestroy(&isrow_d)); 3340 3341 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3342 PetscCall(ISDestroy(&iscol_d)); 3343 3344 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3345 PetscCall(ISDestroy(&iscol_o)); 3346 } 3347 PetscFunctionReturn(0); 3348 } 3349 3350 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3351 { 3352 IS iscol_local = NULL, isrow_d; 3353 PetscInt csize; 3354 PetscInt n, i, j, start, end; 3355 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3356 MPI_Comm comm; 3357 3358 PetscFunctionBegin; 3359 /* If isrow has same processor distribution as mat, 3360 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3361 if (call == MAT_REUSE_MATRIX) { 3362 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3363 if (isrow_d) { 3364 sameRowDist = PETSC_TRUE; 3365 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3366 } else { 3367 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3368 if (iscol_local) { 3369 sameRowDist = PETSC_TRUE; 3370 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3371 } 3372 } 3373 } else { 3374 /* Check if isrow has same processor distribution as mat */ 3375 sameDist[0] = PETSC_FALSE; 3376 PetscCall(ISGetLocalSize(isrow, &n)); 3377 if (!n) { 3378 sameDist[0] = PETSC_TRUE; 3379 } else { 3380 PetscCall(ISGetMinMax(isrow, &i, &j)); 3381 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3382 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3383 } 3384 3385 /* Check if iscol has same processor distribution as mat */ 3386 sameDist[1] = PETSC_FALSE; 3387 PetscCall(ISGetLocalSize(iscol, &n)); 3388 if (!n) { 3389 sameDist[1] = PETSC_TRUE; 3390 } else { 3391 PetscCall(ISGetMinMax(iscol, &i, &j)); 3392 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3393 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3394 } 3395 3396 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3397 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3398 sameRowDist = tsameDist[0]; 3399 } 3400 3401 if (sameRowDist) { 3402 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3403 /* isrow and iscol have same processor distribution as mat */ 3404 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3405 PetscFunctionReturn(0); 3406 } else { /* sameRowDist */ 3407 /* isrow has same processor distribution as mat */ 3408 if (call == MAT_INITIAL_MATRIX) { 3409 PetscBool sorted; 3410 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3411 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3412 PetscCall(ISGetSize(iscol, &i)); 3413 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3414 3415 PetscCall(ISSorted(iscol_local, &sorted)); 3416 if (sorted) { 3417 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3418 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3419 PetscFunctionReturn(0); 3420 } 3421 } else { /* call == MAT_REUSE_MATRIX */ 3422 IS iscol_sub; 3423 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3424 if (iscol_sub) { 3425 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3426 PetscFunctionReturn(0); 3427 } 3428 } 3429 } 3430 } 3431 3432 /* General case: iscol -> iscol_local which has global size of iscol */ 3433 if (call == MAT_REUSE_MATRIX) { 3434 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3435 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3436 } else { 3437 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3438 } 3439 3440 PetscCall(ISGetLocalSize(iscol, &csize)); 3441 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3442 3443 if (call == MAT_INITIAL_MATRIX) { 3444 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3445 PetscCall(ISDestroy(&iscol_local)); 3446 } 3447 PetscFunctionReturn(0); 3448 } 3449 3450 /*@C 3451 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3452 and "off-diagonal" part of the matrix in CSR format. 3453 3454 Collective 3455 3456 Input Parameters: 3457 + comm - MPI communicator 3458 . A - "diagonal" portion of matrix 3459 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3460 - garray - global index of B columns 3461 3462 Output Parameter: 3463 . mat - the matrix, with input A as its local diagonal matrix 3464 Level: advanced 3465 3466 Notes: 3467 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3468 3469 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3470 3471 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3472 @*/ 3473 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3474 { 3475 Mat_MPIAIJ *maij; 3476 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3477 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3478 const PetscScalar *oa; 3479 Mat Bnew; 3480 PetscInt m, n, N; 3481 MatType mpi_mat_type; 3482 3483 PetscFunctionBegin; 3484 PetscCall(MatCreate(comm, mat)); 3485 PetscCall(MatGetSize(A, &m, &n)); 3486 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3487 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3488 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3489 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3490 3491 /* Get global columns of mat */ 3492 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3493 3494 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3495 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3496 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3497 PetscCall(MatSetType(*mat, mpi_mat_type)); 3498 3499 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3500 maij = (Mat_MPIAIJ *)(*mat)->data; 3501 3502 (*mat)->preallocated = PETSC_TRUE; 3503 3504 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3505 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3506 3507 /* Set A as diagonal portion of *mat */ 3508 maij->A = A; 3509 3510 nz = oi[m]; 3511 for (i = 0; i < nz; i++) { 3512 col = oj[i]; 3513 oj[i] = garray[col]; 3514 } 3515 3516 /* Set Bnew as off-diagonal portion of *mat */ 3517 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3518 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3519 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3520 bnew = (Mat_SeqAIJ *)Bnew->data; 3521 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3522 maij->B = Bnew; 3523 3524 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3525 3526 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3527 b->free_a = PETSC_FALSE; 3528 b->free_ij = PETSC_FALSE; 3529 PetscCall(MatDestroy(&B)); 3530 3531 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3532 bnew->free_a = PETSC_TRUE; 3533 bnew->free_ij = PETSC_TRUE; 3534 3535 /* condense columns of maij->B */ 3536 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3537 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3538 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3539 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3540 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3541 PetscFunctionReturn(0); 3542 } 3543 3544 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3545 3546 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3547 { 3548 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3549 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3550 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3551 Mat M, Msub, B = a->B; 3552 MatScalar *aa; 3553 Mat_SeqAIJ *aij; 3554 PetscInt *garray = a->garray, *colsub, Ncols; 3555 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3556 IS iscol_sub, iscmap; 3557 const PetscInt *is_idx, *cmap; 3558 PetscBool allcolumns = PETSC_FALSE; 3559 MPI_Comm comm; 3560 3561 PetscFunctionBegin; 3562 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3563 if (call == MAT_REUSE_MATRIX) { 3564 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3565 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3566 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3567 3568 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3569 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3570 3571 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3572 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3573 3574 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3575 3576 } else { /* call == MAT_INITIAL_MATRIX) */ 3577 PetscBool flg; 3578 3579 PetscCall(ISGetLocalSize(iscol, &n)); 3580 PetscCall(ISGetSize(iscol, &Ncols)); 3581 3582 /* (1) iscol -> nonscalable iscol_local */ 3583 /* Check for special case: each processor gets entire matrix columns */ 3584 PetscCall(ISIdentity(iscol_local, &flg)); 3585 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3586 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3587 if (allcolumns) { 3588 iscol_sub = iscol_local; 3589 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3590 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3591 3592 } else { 3593 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3594 PetscInt *idx, *cmap1, k; 3595 PetscCall(PetscMalloc1(Ncols, &idx)); 3596 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3597 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3598 count = 0; 3599 k = 0; 3600 for (i = 0; i < Ncols; i++) { 3601 j = is_idx[i]; 3602 if (j >= cstart && j < cend) { 3603 /* diagonal part of mat */ 3604 idx[count] = j; 3605 cmap1[count++] = i; /* column index in submat */ 3606 } else if (Bn) { 3607 /* off-diagonal part of mat */ 3608 if (j == garray[k]) { 3609 idx[count] = j; 3610 cmap1[count++] = i; /* column index in submat */ 3611 } else if (j > garray[k]) { 3612 while (j > garray[k] && k < Bn - 1) k++; 3613 if (j == garray[k]) { 3614 idx[count] = j; 3615 cmap1[count++] = i; /* column index in submat */ 3616 } 3617 } 3618 } 3619 } 3620 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3621 3622 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3623 PetscCall(ISGetBlockSize(iscol, &cbs)); 3624 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3625 3626 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3627 } 3628 3629 /* (3) Create sequential Msub */ 3630 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3631 } 3632 3633 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3634 aij = (Mat_SeqAIJ *)(Msub)->data; 3635 ii = aij->i; 3636 PetscCall(ISGetIndices(iscmap, &cmap)); 3637 3638 /* 3639 m - number of local rows 3640 Ncols - number of columns (same on all processors) 3641 rstart - first row in new global matrix generated 3642 */ 3643 PetscCall(MatGetSize(Msub, &m, NULL)); 3644 3645 if (call == MAT_INITIAL_MATRIX) { 3646 /* (4) Create parallel newmat */ 3647 PetscMPIInt rank, size; 3648 PetscInt csize; 3649 3650 PetscCallMPI(MPI_Comm_size(comm, &size)); 3651 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3652 3653 /* 3654 Determine the number of non-zeros in the diagonal and off-diagonal 3655 portions of the matrix in order to do correct preallocation 3656 */ 3657 3658 /* first get start and end of "diagonal" columns */ 3659 PetscCall(ISGetLocalSize(iscol, &csize)); 3660 if (csize == PETSC_DECIDE) { 3661 PetscCall(ISGetSize(isrow, &mglobal)); 3662 if (mglobal == Ncols) { /* square matrix */ 3663 nlocal = m; 3664 } else { 3665 nlocal = Ncols / size + ((Ncols % size) > rank); 3666 } 3667 } else { 3668 nlocal = csize; 3669 } 3670 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3671 rstart = rend - nlocal; 3672 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3673 3674 /* next, compute all the lengths */ 3675 jj = aij->j; 3676 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3677 olens = dlens + m; 3678 for (i = 0; i < m; i++) { 3679 jend = ii[i + 1] - ii[i]; 3680 olen = 0; 3681 dlen = 0; 3682 for (j = 0; j < jend; j++) { 3683 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3684 else dlen++; 3685 jj++; 3686 } 3687 olens[i] = olen; 3688 dlens[i] = dlen; 3689 } 3690 3691 PetscCall(ISGetBlockSize(isrow, &bs)); 3692 PetscCall(ISGetBlockSize(iscol, &cbs)); 3693 3694 PetscCall(MatCreate(comm, &M)); 3695 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3696 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3697 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3698 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3699 PetscCall(PetscFree(dlens)); 3700 3701 } else { /* call == MAT_REUSE_MATRIX */ 3702 M = *newmat; 3703 PetscCall(MatGetLocalSize(M, &i, NULL)); 3704 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3705 PetscCall(MatZeroEntries(M)); 3706 /* 3707 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3708 rather than the slower MatSetValues(). 3709 */ 3710 M->was_assembled = PETSC_TRUE; 3711 M->assembled = PETSC_FALSE; 3712 } 3713 3714 /* (5) Set values of Msub to *newmat */ 3715 PetscCall(PetscMalloc1(count, &colsub)); 3716 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3717 3718 jj = aij->j; 3719 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3720 for (i = 0; i < m; i++) { 3721 row = rstart + i; 3722 nz = ii[i + 1] - ii[i]; 3723 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3724 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3725 jj += nz; 3726 aa += nz; 3727 } 3728 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3729 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3730 3731 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3732 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3733 3734 PetscCall(PetscFree(colsub)); 3735 3736 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3737 if (call == MAT_INITIAL_MATRIX) { 3738 *newmat = M; 3739 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3740 PetscCall(MatDestroy(&Msub)); 3741 3742 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3743 PetscCall(ISDestroy(&iscol_sub)); 3744 3745 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3746 PetscCall(ISDestroy(&iscmap)); 3747 3748 if (iscol_local) { 3749 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3750 PetscCall(ISDestroy(&iscol_local)); 3751 } 3752 } 3753 PetscFunctionReturn(0); 3754 } 3755 3756 /* 3757 Not great since it makes two copies of the submatrix, first an SeqAIJ 3758 in local and then by concatenating the local matrices the end result. 3759 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3760 3761 This requires a sequential iscol with all indices. 3762 */ 3763 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3764 { 3765 PetscMPIInt rank, size; 3766 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3767 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3768 Mat M, Mreuse; 3769 MatScalar *aa, *vwork; 3770 MPI_Comm comm; 3771 Mat_SeqAIJ *aij; 3772 PetscBool colflag, allcolumns = PETSC_FALSE; 3773 3774 PetscFunctionBegin; 3775 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3776 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3777 PetscCallMPI(MPI_Comm_size(comm, &size)); 3778 3779 /* Check for special case: each processor gets entire matrix columns */ 3780 PetscCall(ISIdentity(iscol, &colflag)); 3781 PetscCall(ISGetLocalSize(iscol, &n)); 3782 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3783 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3784 3785 if (call == MAT_REUSE_MATRIX) { 3786 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3787 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3788 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3789 } else { 3790 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3791 } 3792 3793 /* 3794 m - number of local rows 3795 n - number of columns (same on all processors) 3796 rstart - first row in new global matrix generated 3797 */ 3798 PetscCall(MatGetSize(Mreuse, &m, &n)); 3799 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3800 if (call == MAT_INITIAL_MATRIX) { 3801 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3802 ii = aij->i; 3803 jj = aij->j; 3804 3805 /* 3806 Determine the number of non-zeros in the diagonal and off-diagonal 3807 portions of the matrix in order to do correct preallocation 3808 */ 3809 3810 /* first get start and end of "diagonal" columns */ 3811 if (csize == PETSC_DECIDE) { 3812 PetscCall(ISGetSize(isrow, &mglobal)); 3813 if (mglobal == n) { /* square matrix */ 3814 nlocal = m; 3815 } else { 3816 nlocal = n / size + ((n % size) > rank); 3817 } 3818 } else { 3819 nlocal = csize; 3820 } 3821 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3822 rstart = rend - nlocal; 3823 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3824 3825 /* next, compute all the lengths */ 3826 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3827 olens = dlens + m; 3828 for (i = 0; i < m; i++) { 3829 jend = ii[i + 1] - ii[i]; 3830 olen = 0; 3831 dlen = 0; 3832 for (j = 0; j < jend; j++) { 3833 if (*jj < rstart || *jj >= rend) olen++; 3834 else dlen++; 3835 jj++; 3836 } 3837 olens[i] = olen; 3838 dlens[i] = dlen; 3839 } 3840 PetscCall(MatCreate(comm, &M)); 3841 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3842 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3843 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3844 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3845 PetscCall(PetscFree(dlens)); 3846 } else { 3847 PetscInt ml, nl; 3848 3849 M = *newmat; 3850 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3851 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3852 PetscCall(MatZeroEntries(M)); 3853 /* 3854 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3855 rather than the slower MatSetValues(). 3856 */ 3857 M->was_assembled = PETSC_TRUE; 3858 M->assembled = PETSC_FALSE; 3859 } 3860 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3861 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3862 ii = aij->i; 3863 jj = aij->j; 3864 3865 /* trigger copy to CPU if needed */ 3866 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3867 for (i = 0; i < m; i++) { 3868 row = rstart + i; 3869 nz = ii[i + 1] - ii[i]; 3870 cwork = jj; 3871 jj += nz; 3872 vwork = aa; 3873 aa += nz; 3874 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3875 } 3876 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3877 3878 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3879 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3880 *newmat = M; 3881 3882 /* save submatrix used in processor for next request */ 3883 if (call == MAT_INITIAL_MATRIX) { 3884 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3885 PetscCall(MatDestroy(&Mreuse)); 3886 } 3887 PetscFunctionReturn(0); 3888 } 3889 3890 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3891 { 3892 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3893 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3894 const PetscInt *JJ; 3895 PetscBool nooffprocentries; 3896 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3897 3898 PetscFunctionBegin; 3899 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3900 3901 PetscCall(PetscLayoutSetUp(B->rmap)); 3902 PetscCall(PetscLayoutSetUp(B->cmap)); 3903 m = B->rmap->n; 3904 cstart = B->cmap->rstart; 3905 cend = B->cmap->rend; 3906 rstart = B->rmap->rstart; 3907 3908 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3909 3910 if (PetscDefined(USE_DEBUG)) { 3911 for (i = 0; i < m; i++) { 3912 nnz = Ii[i + 1] - Ii[i]; 3913 JJ = J + Ii[i]; 3914 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3915 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3916 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3917 } 3918 } 3919 3920 for (i = 0; i < m; i++) { 3921 nnz = Ii[i + 1] - Ii[i]; 3922 JJ = J + Ii[i]; 3923 nnz_max = PetscMax(nnz_max, nnz); 3924 d = 0; 3925 for (j = 0; j < nnz; j++) { 3926 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3927 } 3928 d_nnz[i] = d; 3929 o_nnz[i] = nnz - d; 3930 } 3931 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3932 PetscCall(PetscFree2(d_nnz, o_nnz)); 3933 3934 for (i = 0; i < m; i++) { 3935 ii = i + rstart; 3936 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3937 } 3938 nooffprocentries = B->nooffprocentries; 3939 B->nooffprocentries = PETSC_TRUE; 3940 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3941 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3942 B->nooffprocentries = nooffprocentries; 3943 3944 /* count number of entries below block diagonal */ 3945 PetscCall(PetscFree(Aij->ld)); 3946 PetscCall(PetscCalloc1(m, &ld)); 3947 Aij->ld = ld; 3948 for (i = 0; i < m; i++) { 3949 nnz = Ii[i + 1] - Ii[i]; 3950 j = 0; 3951 while (j < nnz && J[j] < cstart) j++; 3952 ld[i] = j; 3953 J += nnz; 3954 } 3955 3956 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3957 PetscFunctionReturn(0); 3958 } 3959 3960 /*@ 3961 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3962 (the default parallel PETSc format). 3963 3964 Collective 3965 3966 Input Parameters: 3967 + B - the matrix 3968 . i - the indices into j for the start of each local row (starts with zero) 3969 . j - the column indices for each local row (starts with zero) 3970 - v - optional values in the matrix 3971 3972 Level: developer 3973 3974 Notes: 3975 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3976 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3977 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3978 3979 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3980 3981 The format which is used for the sparse matrix input, is equivalent to a 3982 row-major ordering.. i.e for the following matrix, the input data expected is 3983 as shown 3984 3985 $ 1 0 0 3986 $ 2 0 3 P0 3987 $ ------- 3988 $ 4 5 6 P1 3989 $ 3990 $ Process0 [P0]: rows_owned=[0,1] 3991 $ i = {0,1,3} [size = nrow+1 = 2+1] 3992 $ j = {0,0,2} [size = 3] 3993 $ v = {1,2,3} [size = 3] 3994 $ 3995 $ Process1 [P1]: rows_owned=[2] 3996 $ i = {0,3} [size = nrow+1 = 1+1] 3997 $ j = {0,1,2} [size = 3] 3998 $ v = {4,5,6} [size = 3] 3999 4000 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 4001 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 4002 @*/ 4003 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4004 { 4005 PetscFunctionBegin; 4006 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4007 PetscFunctionReturn(0); 4008 } 4009 4010 /*@C 4011 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4012 (the default parallel PETSc format). For good matrix assembly performance 4013 the user should preallocate the matrix storage by setting the parameters 4014 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4015 performance can be increased by more than a factor of 50. 4016 4017 Collective 4018 4019 Input Parameters: 4020 + B - the matrix 4021 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4022 (same value is used for all local rows) 4023 . d_nnz - array containing the number of nonzeros in the various rows of the 4024 DIAGONAL portion of the local submatrix (possibly different for each row) 4025 or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure. 4026 The size of this array is equal to the number of local rows, i.e 'm'. 4027 For matrices that will be factored, you must leave room for (and set) 4028 the diagonal entry even if it is zero. 4029 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4030 submatrix (same value is used for all local rows). 4031 - o_nnz - array containing the number of nonzeros in the various rows of the 4032 OFF-DIAGONAL portion of the local submatrix (possibly different for 4033 each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero 4034 structure. The size of this array is equal to the number 4035 of local rows, i.e 'm'. 4036 4037 If the *_nnz parameter is given then the *_nz parameter is ignored 4038 4039 The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77 4040 storage. The stored row and column indices begin with zero. 4041 See [Sparse Matrices](sec_matsparse) for details. 4042 4043 The parallel matrix is partitioned such that the first m0 rows belong to 4044 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4045 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4046 4047 The DIAGONAL portion of the local submatrix of a processor can be defined 4048 as the submatrix which is obtained by extraction the part corresponding to 4049 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4050 first row that belongs to the processor, r2 is the last row belonging to 4051 the this processor, and c1-c2 is range of indices of the local part of a 4052 vector suitable for applying the matrix to. This is an mxn matrix. In the 4053 common case of a square matrix, the row and column ranges are the same and 4054 the DIAGONAL part is also square. The remaining portion of the local 4055 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4056 4057 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4058 4059 You can call MatGetInfo() to get information on how effective the preallocation was; 4060 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4061 You can also run with the option -info and look for messages with the string 4062 malloc in them to see if additional memory allocation was needed. 4063 4064 Example usage: 4065 4066 Consider the following 8x8 matrix with 34 non-zero values, that is 4067 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4068 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4069 as follows: 4070 4071 .vb 4072 1 2 0 | 0 3 0 | 0 4 4073 Proc0 0 5 6 | 7 0 0 | 8 0 4074 9 0 10 | 11 0 0 | 12 0 4075 ------------------------------------- 4076 13 0 14 | 15 16 17 | 0 0 4077 Proc1 0 18 0 | 19 20 21 | 0 0 4078 0 0 0 | 22 23 0 | 24 0 4079 ------------------------------------- 4080 Proc2 25 26 27 | 0 0 28 | 29 0 4081 30 0 0 | 31 32 33 | 0 34 4082 .ve 4083 4084 This can be represented as a collection of submatrices as: 4085 4086 .vb 4087 A B C 4088 D E F 4089 G H I 4090 .ve 4091 4092 Where the submatrices A,B,C are owned by proc0, D,E,F are 4093 owned by proc1, G,H,I are owned by proc2. 4094 4095 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4096 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4097 The 'M','N' parameters are 8,8, and have the same values on all procs. 4098 4099 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4100 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4101 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4102 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4103 part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ 4104 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4105 4106 When d_nz, o_nz parameters are specified, d_nz storage elements are 4107 allocated for every row of the local diagonal submatrix, and o_nz 4108 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4109 One way to choose d_nz and o_nz is to use the max nonzerors per local 4110 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4111 In this case, the values of d_nz,o_nz are: 4112 .vb 4113 proc0 : dnz = 2, o_nz = 2 4114 proc1 : dnz = 3, o_nz = 2 4115 proc2 : dnz = 1, o_nz = 4 4116 .ve 4117 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4118 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4119 for proc3. i.e we are using 12+15+10=37 storage locations to store 4120 34 values. 4121 4122 When d_nnz, o_nnz parameters are specified, the storage is specified 4123 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4124 In the above case the values for d_nnz,o_nnz are: 4125 .vb 4126 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4127 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4128 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4129 .ve 4130 Here the space allocated is sum of all the above values i.e 34, and 4131 hence pre-allocation is perfect. 4132 4133 Level: intermediate 4134 4135 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4136 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4137 @*/ 4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4139 { 4140 PetscFunctionBegin; 4141 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4142 PetscValidType(B, 1); 4143 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4144 PetscFunctionReturn(0); 4145 } 4146 4147 /*@ 4148 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4149 CSR format for the local rows. 4150 4151 Collective 4152 4153 Input Parameters: 4154 + comm - MPI communicator 4155 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4156 . n - This value should be the same as the local size used in creating the 4157 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4158 calculated if N is given) For square matrices n is almost always m. 4159 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4160 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4161 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4162 . j - column indices 4163 - a - optional matrix values 4164 4165 Output Parameter: 4166 . mat - the matrix 4167 4168 Level: intermediate 4169 4170 Notes: 4171 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4172 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4173 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4174 4175 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4176 4177 The format which is used for the sparse matrix input, is equivalent to a 4178 row-major ordering.. i.e for the following matrix, the input data expected is 4179 as shown 4180 4181 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4182 4183 $ 1 0 0 4184 $ 2 0 3 P0 4185 $ ------- 4186 $ 4 5 6 P1 4187 $ 4188 $ Process0 [P0]: rows_owned=[0,1] 4189 $ i = {0,1,3} [size = nrow+1 = 2+1] 4190 $ j = {0,0,2} [size = 3] 4191 $ v = {1,2,3} [size = 3] 4192 $ 4193 $ Process1 [P1]: rows_owned=[2] 4194 $ i = {0,3} [size = nrow+1 = 1+1] 4195 $ j = {0,1,2} [size = 3] 4196 $ v = {4,5,6} [size = 3] 4197 4198 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4199 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4200 @*/ 4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4202 { 4203 PetscFunctionBegin; 4204 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4205 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4206 PetscCall(MatCreate(comm, mat)); 4207 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4208 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4209 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4210 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4211 PetscFunctionReturn(0); 4212 } 4213 4214 /*@ 4215 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4216 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()` 4217 4218 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4219 4220 Collective 4221 4222 Input Parameters: 4223 + mat - the matrix 4224 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4225 . n - This value should be the same as the local size used in creating the 4226 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4227 calculated if N is given) For square matrices n is almost always m. 4228 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4229 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4230 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4231 . J - column indices 4232 - v - matrix values 4233 4234 Level: intermediate 4235 4236 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4237 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4238 @*/ 4239 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4240 { 4241 PetscInt nnz, i; 4242 PetscBool nooffprocentries; 4243 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4244 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4245 PetscScalar *ad, *ao; 4246 PetscInt ldi, Iii, md; 4247 const PetscInt *Adi = Ad->i; 4248 PetscInt *ld = Aij->ld; 4249 4250 PetscFunctionBegin; 4251 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4252 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4253 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4254 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4255 4256 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4257 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4258 4259 for (i = 0; i < m; i++) { 4260 nnz = Ii[i + 1] - Ii[i]; 4261 Iii = Ii[i]; 4262 ldi = ld[i]; 4263 md = Adi[i + 1] - Adi[i]; 4264 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4265 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4266 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4267 ad += md; 4268 ao += nnz - md; 4269 } 4270 nooffprocentries = mat->nooffprocentries; 4271 mat->nooffprocentries = PETSC_TRUE; 4272 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4273 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4274 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4275 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4276 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4277 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4278 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4279 mat->nooffprocentries = nooffprocentries; 4280 PetscFunctionReturn(0); 4281 } 4282 4283 /*@ 4284 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4285 4286 Collective 4287 4288 Input Parameters: 4289 + mat - the matrix 4290 - v - matrix values, stored by row 4291 4292 Level: intermediate 4293 4294 Note: 4295 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4296 4297 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4298 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4299 @*/ 4300 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4301 { 4302 PetscInt nnz, i, m; 4303 PetscBool nooffprocentries; 4304 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4307 PetscScalar *ad, *ao; 4308 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4309 PetscInt ldi, Iii, md; 4310 PetscInt *ld = Aij->ld; 4311 4312 PetscFunctionBegin; 4313 m = mat->rmap->n; 4314 4315 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4316 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4317 Iii = 0; 4318 for (i = 0; i < m; i++) { 4319 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4320 ldi = ld[i]; 4321 md = Adi[i + 1] - Adi[i]; 4322 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4323 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4324 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4325 ad += md; 4326 ao += nnz - md; 4327 Iii += nnz; 4328 } 4329 nooffprocentries = mat->nooffprocentries; 4330 mat->nooffprocentries = PETSC_TRUE; 4331 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4332 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4333 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4334 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4335 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4336 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4337 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4338 mat->nooffprocentries = nooffprocentries; 4339 PetscFunctionReturn(0); 4340 } 4341 4342 /*@C 4343 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4344 (the default parallel PETSc format). For good matrix assembly performance 4345 the user should preallocate the matrix storage by setting the parameters 4346 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4347 performance can be increased by more than a factor of 50. 4348 4349 Collective 4350 4351 Input Parameters: 4352 + comm - MPI communicator 4353 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4354 This value should be the same as the local size used in creating the 4355 y vector for the matrix-vector product y = Ax. 4356 . n - This value should be the same as the local size used in creating the 4357 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4358 calculated if N is given) For square matrices n is almost always m. 4359 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4360 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4361 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4362 (same value is used for all local rows) 4363 . d_nnz - array containing the number of nonzeros in the various rows of the 4364 DIAGONAL portion of the local submatrix (possibly different for each row) 4365 or NULL, if d_nz is used to specify the nonzero structure. 4366 The size of this array is equal to the number of local rows, i.e 'm'. 4367 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4368 submatrix (same value is used for all local rows). 4369 - o_nnz - array containing the number of nonzeros in the various rows of the 4370 OFF-DIAGONAL portion of the local submatrix (possibly different for 4371 each row) or NULL, if o_nz is used to specify the nonzero 4372 structure. The size of this array is equal to the number 4373 of local rows, i.e 'm'. 4374 4375 Output Parameter: 4376 . A - the matrix 4377 4378 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4379 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4380 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4381 4382 Notes: 4383 If the *_nnz parameter is given then the *_nz parameter is ignored 4384 4385 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4386 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4387 storage requirements for this matrix. 4388 4389 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4390 processor than it must be used on all processors that share the object for 4391 that argument. 4392 4393 The user MUST specify either the local or global matrix dimensions 4394 (possibly both). 4395 4396 The parallel matrix is partitioned across processors such that the 4397 first m0 rows belong to process 0, the next m1 rows belong to 4398 process 1, the next m2 rows belong to process 2 etc.. where 4399 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4400 values corresponding to [m x N] submatrix. 4401 4402 The columns are logically partitioned with the n0 columns belonging 4403 to 0th partition, the next n1 columns belonging to the next 4404 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4405 4406 The DIAGONAL portion of the local submatrix on any given processor 4407 is the submatrix corresponding to the rows and columns m,n 4408 corresponding to the given processor. i.e diagonal matrix on 4409 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4410 etc. The remaining portion of the local submatrix [m x (N-n)] 4411 constitute the OFF-DIAGONAL portion. The example below better 4412 illustrates this concept. 4413 4414 For a square global matrix we define each processor's diagonal portion 4415 to be its local rows and the corresponding columns (a square submatrix); 4416 each processor's off-diagonal portion encompasses the remainder of the 4417 local matrix (a rectangular submatrix). 4418 4419 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4420 4421 When calling this routine with a single process communicator, a matrix of 4422 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4423 type of communicator, use the construction mechanism 4424 .vb 4425 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4426 .ve 4427 4428 $ MatCreate(...,&A); 4429 $ MatSetType(A,MATMPIAIJ); 4430 $ MatSetSizes(A, m,n,M,N); 4431 $ MatMPIAIJSetPreallocation(A,...); 4432 4433 By default, this format uses inodes (identical nodes) when possible. 4434 We search for consecutive rows with the same nonzero structure, thereby 4435 reusing matrix information to achieve increased efficiency. 4436 4437 Options Database Keys: 4438 + -mat_no_inode - Do not use inodes 4439 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4440 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4441 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4442 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4443 4444 Example usage: 4445 4446 Consider the following 8x8 matrix with 34 non-zero values, that is 4447 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4448 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4449 as follows 4450 4451 .vb 4452 1 2 0 | 0 3 0 | 0 4 4453 Proc0 0 5 6 | 7 0 0 | 8 0 4454 9 0 10 | 11 0 0 | 12 0 4455 ------------------------------------- 4456 13 0 14 | 15 16 17 | 0 0 4457 Proc1 0 18 0 | 19 20 21 | 0 0 4458 0 0 0 | 22 23 0 | 24 0 4459 ------------------------------------- 4460 Proc2 25 26 27 | 0 0 28 | 29 0 4461 30 0 0 | 31 32 33 | 0 34 4462 .ve 4463 4464 This can be represented as a collection of submatrices as 4465 4466 .vb 4467 A B C 4468 D E F 4469 G H I 4470 .ve 4471 4472 Where the submatrices A,B,C are owned by proc0, D,E,F are 4473 owned by proc1, G,H,I are owned by proc2. 4474 4475 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4476 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4477 The 'M','N' parameters are 8,8, and have the same values on all procs. 4478 4479 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4480 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4481 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4482 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4483 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4484 matrix, ans [DF] as another SeqAIJ matrix. 4485 4486 When d_nz, o_nz parameters are specified, d_nz storage elements are 4487 allocated for every row of the local diagonal submatrix, and o_nz 4488 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4489 One way to choose d_nz and o_nz is to use the max nonzerors per local 4490 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4491 In this case, the values of d_nz,o_nz are 4492 .vb 4493 proc0 : dnz = 2, o_nz = 2 4494 proc1 : dnz = 3, o_nz = 2 4495 proc2 : dnz = 1, o_nz = 4 4496 .ve 4497 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4498 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4499 for proc3. i.e we are using 12+15+10=37 storage locations to store 4500 34 values. 4501 4502 When d_nnz, o_nnz parameters are specified, the storage is specified 4503 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4504 In the above case the values for d_nnz,o_nnz are 4505 .vb 4506 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4507 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4508 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4509 .ve 4510 Here the space allocated is sum of all the above values i.e 34, and 4511 hence pre-allocation is perfect. 4512 4513 Level: intermediate 4514 4515 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4516 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4517 @*/ 4518 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4519 { 4520 PetscMPIInt size; 4521 4522 PetscFunctionBegin; 4523 PetscCall(MatCreate(comm, A)); 4524 PetscCall(MatSetSizes(*A, m, n, M, N)); 4525 PetscCallMPI(MPI_Comm_size(comm, &size)); 4526 if (size > 1) { 4527 PetscCall(MatSetType(*A, MATMPIAIJ)); 4528 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4529 } else { 4530 PetscCall(MatSetType(*A, MATSEQAIJ)); 4531 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4532 } 4533 PetscFunctionReturn(0); 4534 } 4535 4536 /*@C 4537 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4538 4539 Not collective 4540 4541 Input Parameter: 4542 . A - The `MATMPIAIJ` matrix 4543 4544 Output Parameters: 4545 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4546 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4547 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4548 4549 Note: 4550 The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4551 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4552 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4553 local column numbers to global column numbers in the original matrix. 4554 4555 Level: intermediate 4556 4557 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4558 @*/ 4559 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4560 { 4561 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4562 PetscBool flg; 4563 4564 PetscFunctionBegin; 4565 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4566 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4567 if (Ad) *Ad = a->A; 4568 if (Ao) *Ao = a->B; 4569 if (colmap) *colmap = a->garray; 4570 PetscFunctionReturn(0); 4571 } 4572 4573 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4574 { 4575 PetscInt m, N, i, rstart, nnz, Ii; 4576 PetscInt *indx; 4577 PetscScalar *values; 4578 MatType rootType; 4579 4580 PetscFunctionBegin; 4581 PetscCall(MatGetSize(inmat, &m, &N)); 4582 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4583 PetscInt *dnz, *onz, sum, bs, cbs; 4584 4585 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4586 /* Check sum(n) = N */ 4587 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4588 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4589 4590 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4591 rstart -= m; 4592 4593 MatPreallocateBegin(comm, m, n, dnz, onz); 4594 for (i = 0; i < m; i++) { 4595 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4596 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4597 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4598 } 4599 4600 PetscCall(MatCreate(comm, outmat)); 4601 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4602 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4603 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4604 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4605 PetscCall(MatSetType(*outmat, rootType)); 4606 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4607 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4608 MatPreallocateEnd(dnz, onz); 4609 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4610 } 4611 4612 /* numeric phase */ 4613 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4614 for (i = 0; i < m; i++) { 4615 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4616 Ii = i + rstart; 4617 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4618 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4619 } 4620 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4621 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4622 PetscFunctionReturn(0); 4623 } 4624 4625 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4626 { 4627 PetscMPIInt rank; 4628 PetscInt m, N, i, rstart, nnz; 4629 size_t len; 4630 const PetscInt *indx; 4631 PetscViewer out; 4632 char *name; 4633 Mat B; 4634 const PetscScalar *values; 4635 4636 PetscFunctionBegin; 4637 PetscCall(MatGetLocalSize(A, &m, NULL)); 4638 PetscCall(MatGetSize(A, NULL, &N)); 4639 /* Should this be the type of the diagonal block of A? */ 4640 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4641 PetscCall(MatSetSizes(B, m, N, m, N)); 4642 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4643 PetscCall(MatSetType(B, MATSEQAIJ)); 4644 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4645 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4646 for (i = 0; i < m; i++) { 4647 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4648 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4649 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4650 } 4651 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4652 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4653 4654 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4655 PetscCall(PetscStrlen(outfile, &len)); 4656 PetscCall(PetscMalloc1(len + 6, &name)); 4657 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4658 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4659 PetscCall(PetscFree(name)); 4660 PetscCall(MatView(B, out)); 4661 PetscCall(PetscViewerDestroy(&out)); 4662 PetscCall(MatDestroy(&B)); 4663 PetscFunctionReturn(0); 4664 } 4665 4666 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4667 { 4668 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4669 4670 PetscFunctionBegin; 4671 if (!merge) PetscFunctionReturn(0); 4672 PetscCall(PetscFree(merge->id_r)); 4673 PetscCall(PetscFree(merge->len_s)); 4674 PetscCall(PetscFree(merge->len_r)); 4675 PetscCall(PetscFree(merge->bi)); 4676 PetscCall(PetscFree(merge->bj)); 4677 PetscCall(PetscFree(merge->buf_ri[0])); 4678 PetscCall(PetscFree(merge->buf_ri)); 4679 PetscCall(PetscFree(merge->buf_rj[0])); 4680 PetscCall(PetscFree(merge->buf_rj)); 4681 PetscCall(PetscFree(merge->coi)); 4682 PetscCall(PetscFree(merge->coj)); 4683 PetscCall(PetscFree(merge->owners_co)); 4684 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4685 PetscCall(PetscFree(merge)); 4686 PetscFunctionReturn(0); 4687 } 4688 4689 #include <../src/mat/utils/freespace.h> 4690 #include <petscbt.h> 4691 4692 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4693 { 4694 MPI_Comm comm; 4695 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4696 PetscMPIInt size, rank, taga, *len_s; 4697 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4698 PetscInt proc, m; 4699 PetscInt **buf_ri, **buf_rj; 4700 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4701 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4702 MPI_Request *s_waits, *r_waits; 4703 MPI_Status *status; 4704 const MatScalar *aa, *a_a; 4705 MatScalar **abuf_r, *ba_i; 4706 Mat_Merge_SeqsToMPI *merge; 4707 PetscContainer container; 4708 4709 PetscFunctionBegin; 4710 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4711 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4712 4713 PetscCallMPI(MPI_Comm_size(comm, &size)); 4714 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4715 4716 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4717 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4718 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4719 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4720 aa = a_a; 4721 4722 bi = merge->bi; 4723 bj = merge->bj; 4724 buf_ri = merge->buf_ri; 4725 buf_rj = merge->buf_rj; 4726 4727 PetscCall(PetscMalloc1(size, &status)); 4728 owners = merge->rowmap->range; 4729 len_s = merge->len_s; 4730 4731 /* send and recv matrix values */ 4732 /*-----------------------------*/ 4733 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4734 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4735 4736 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4737 for (proc = 0, k = 0; proc < size; proc++) { 4738 if (!len_s[proc]) continue; 4739 i = owners[proc]; 4740 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4741 k++; 4742 } 4743 4744 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4745 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4746 PetscCall(PetscFree(status)); 4747 4748 PetscCall(PetscFree(s_waits)); 4749 PetscCall(PetscFree(r_waits)); 4750 4751 /* insert mat values of mpimat */ 4752 /*----------------------------*/ 4753 PetscCall(PetscMalloc1(N, &ba_i)); 4754 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4755 4756 for (k = 0; k < merge->nrecv; k++) { 4757 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4758 nrows = *(buf_ri_k[k]); 4759 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4760 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4761 } 4762 4763 /* set values of ba */ 4764 m = merge->rowmap->n; 4765 for (i = 0; i < m; i++) { 4766 arow = owners[rank] + i; 4767 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4768 bnzi = bi[i + 1] - bi[i]; 4769 PetscCall(PetscArrayzero(ba_i, bnzi)); 4770 4771 /* add local non-zero vals of this proc's seqmat into ba */ 4772 anzi = ai[arow + 1] - ai[arow]; 4773 aj = a->j + ai[arow]; 4774 aa = a_a + ai[arow]; 4775 nextaj = 0; 4776 for (j = 0; nextaj < anzi; j++) { 4777 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4778 ba_i[j] += aa[nextaj++]; 4779 } 4780 } 4781 4782 /* add received vals into ba */ 4783 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4784 /* i-th row */ 4785 if (i == *nextrow[k]) { 4786 anzi = *(nextai[k] + 1) - *nextai[k]; 4787 aj = buf_rj[k] + *(nextai[k]); 4788 aa = abuf_r[k] + *(nextai[k]); 4789 nextaj = 0; 4790 for (j = 0; nextaj < anzi; j++) { 4791 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4792 ba_i[j] += aa[nextaj++]; 4793 } 4794 } 4795 nextrow[k]++; 4796 nextai[k]++; 4797 } 4798 } 4799 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4800 } 4801 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4802 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4803 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4804 4805 PetscCall(PetscFree(abuf_r[0])); 4806 PetscCall(PetscFree(abuf_r)); 4807 PetscCall(PetscFree(ba_i)); 4808 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4809 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4810 PetscFunctionReturn(0); 4811 } 4812 4813 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4814 { 4815 Mat B_mpi; 4816 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4817 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4818 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4819 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4820 PetscInt len, proc, *dnz, *onz, bs, cbs; 4821 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4822 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4823 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4824 MPI_Status *status; 4825 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4826 PetscBT lnkbt; 4827 Mat_Merge_SeqsToMPI *merge; 4828 PetscContainer container; 4829 4830 PetscFunctionBegin; 4831 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4832 4833 /* make sure it is a PETSc comm */ 4834 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4835 PetscCallMPI(MPI_Comm_size(comm, &size)); 4836 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4837 4838 PetscCall(PetscNew(&merge)); 4839 PetscCall(PetscMalloc1(size, &status)); 4840 4841 /* determine row ownership */ 4842 /*---------------------------------------------------------*/ 4843 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4844 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4845 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4846 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4847 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4848 PetscCall(PetscMalloc1(size, &len_si)); 4849 PetscCall(PetscMalloc1(size, &merge->len_s)); 4850 4851 m = merge->rowmap->n; 4852 owners = merge->rowmap->range; 4853 4854 /* determine the number of messages to send, their lengths */ 4855 /*---------------------------------------------------------*/ 4856 len_s = merge->len_s; 4857 4858 len = 0; /* length of buf_si[] */ 4859 merge->nsend = 0; 4860 for (proc = 0; proc < size; proc++) { 4861 len_si[proc] = 0; 4862 if (proc == rank) { 4863 len_s[proc] = 0; 4864 } else { 4865 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4866 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4867 } 4868 if (len_s[proc]) { 4869 merge->nsend++; 4870 nrows = 0; 4871 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4872 if (ai[i + 1] > ai[i]) nrows++; 4873 } 4874 len_si[proc] = 2 * (nrows + 1); 4875 len += len_si[proc]; 4876 } 4877 } 4878 4879 /* determine the number and length of messages to receive for ij-structure */ 4880 /*-------------------------------------------------------------------------*/ 4881 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4882 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4883 4884 /* post the Irecv of j-structure */ 4885 /*-------------------------------*/ 4886 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4887 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4888 4889 /* post the Isend of j-structure */ 4890 /*--------------------------------*/ 4891 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4892 4893 for (proc = 0, k = 0; proc < size; proc++) { 4894 if (!len_s[proc]) continue; 4895 i = owners[proc]; 4896 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4897 k++; 4898 } 4899 4900 /* receives and sends of j-structure are complete */ 4901 /*------------------------------------------------*/ 4902 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4903 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4904 4905 /* send and recv i-structure */ 4906 /*---------------------------*/ 4907 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4908 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4909 4910 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4911 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4912 for (proc = 0, k = 0; proc < size; proc++) { 4913 if (!len_s[proc]) continue; 4914 /* form outgoing message for i-structure: 4915 buf_si[0]: nrows to be sent 4916 [1:nrows]: row index (global) 4917 [nrows+1:2*nrows+1]: i-structure index 4918 */ 4919 /*-------------------------------------------*/ 4920 nrows = len_si[proc] / 2 - 1; 4921 buf_si_i = buf_si + nrows + 1; 4922 buf_si[0] = nrows; 4923 buf_si_i[0] = 0; 4924 nrows = 0; 4925 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4926 anzi = ai[i + 1] - ai[i]; 4927 if (anzi) { 4928 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4929 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4930 nrows++; 4931 } 4932 } 4933 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4934 k++; 4935 buf_si += len_si[proc]; 4936 } 4937 4938 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4939 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4940 4941 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4942 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4943 4944 PetscCall(PetscFree(len_si)); 4945 PetscCall(PetscFree(len_ri)); 4946 PetscCall(PetscFree(rj_waits)); 4947 PetscCall(PetscFree2(si_waits, sj_waits)); 4948 PetscCall(PetscFree(ri_waits)); 4949 PetscCall(PetscFree(buf_s)); 4950 PetscCall(PetscFree(status)); 4951 4952 /* compute a local seq matrix in each processor */ 4953 /*----------------------------------------------*/ 4954 /* allocate bi array and free space for accumulating nonzero column info */ 4955 PetscCall(PetscMalloc1(m + 1, &bi)); 4956 bi[0] = 0; 4957 4958 /* create and initialize a linked list */ 4959 nlnk = N + 1; 4960 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4961 4962 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4963 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4964 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4965 4966 current_space = free_space; 4967 4968 /* determine symbolic info for each local row */ 4969 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4970 4971 for (k = 0; k < merge->nrecv; k++) { 4972 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4973 nrows = *buf_ri_k[k]; 4974 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4975 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4976 } 4977 4978 MatPreallocateBegin(comm, m, n, dnz, onz); 4979 len = 0; 4980 for (i = 0; i < m; i++) { 4981 bnzi = 0; 4982 /* add local non-zero cols of this proc's seqmat into lnk */ 4983 arow = owners[rank] + i; 4984 anzi = ai[arow + 1] - ai[arow]; 4985 aj = a->j + ai[arow]; 4986 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4987 bnzi += nlnk; 4988 /* add received col data into lnk */ 4989 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4990 if (i == *nextrow[k]) { /* i-th row */ 4991 anzi = *(nextai[k] + 1) - *nextai[k]; 4992 aj = buf_rj[k] + *nextai[k]; 4993 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4994 bnzi += nlnk; 4995 nextrow[k]++; 4996 nextai[k]++; 4997 } 4998 } 4999 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5000 5001 /* if free space is not available, make more free space */ 5002 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5003 /* copy data into free space, then initialize lnk */ 5004 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5005 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5006 5007 current_space->array += bnzi; 5008 current_space->local_used += bnzi; 5009 current_space->local_remaining -= bnzi; 5010 5011 bi[i + 1] = bi[i] + bnzi; 5012 } 5013 5014 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5015 5016 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5017 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5018 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5019 5020 /* create symbolic parallel matrix B_mpi */ 5021 /*---------------------------------------*/ 5022 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5023 PetscCall(MatCreate(comm, &B_mpi)); 5024 if (n == PETSC_DECIDE) { 5025 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5026 } else { 5027 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5028 } 5029 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5030 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5031 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5032 MatPreallocateEnd(dnz, onz); 5033 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5034 5035 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5036 B_mpi->assembled = PETSC_FALSE; 5037 merge->bi = bi; 5038 merge->bj = bj; 5039 merge->buf_ri = buf_ri; 5040 merge->buf_rj = buf_rj; 5041 merge->coi = NULL; 5042 merge->coj = NULL; 5043 merge->owners_co = NULL; 5044 5045 PetscCall(PetscCommDestroy(&comm)); 5046 5047 /* attach the supporting struct to B_mpi for reuse */ 5048 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5049 PetscCall(PetscContainerSetPointer(container, merge)); 5050 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5051 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5052 PetscCall(PetscContainerDestroy(&container)); 5053 *mpimat = B_mpi; 5054 5055 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5056 PetscFunctionReturn(0); 5057 } 5058 5059 /*@C 5060 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5061 matrices from each processor 5062 5063 Collective 5064 5065 Input Parameters: 5066 + comm - the communicators the parallel matrix will live on 5067 . seqmat - the input sequential matrices 5068 . m - number of local rows (or `PETSC_DECIDE`) 5069 . n - number of local columns (or `PETSC_DECIDE`) 5070 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5071 5072 Output Parameter: 5073 . mpimat - the parallel matrix generated 5074 5075 Level: advanced 5076 5077 Note: 5078 The dimensions of the sequential matrix in each processor MUST be the same. 5079 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5080 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5081 @*/ 5082 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5083 { 5084 PetscMPIInt size; 5085 5086 PetscFunctionBegin; 5087 PetscCallMPI(MPI_Comm_size(comm, &size)); 5088 if (size == 1) { 5089 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5090 if (scall == MAT_INITIAL_MATRIX) { 5091 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5092 } else { 5093 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5094 } 5095 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5096 PetscFunctionReturn(0); 5097 } 5098 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5099 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5100 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5101 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5102 PetscFunctionReturn(0); 5103 } 5104 5105 /*@ 5106 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5107 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5108 with `MatGetSize()` 5109 5110 Not Collective 5111 5112 Input Parameters: 5113 + A - the matrix 5114 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5115 5116 Output Parameter: 5117 . A_loc - the local sequential matrix generated 5118 5119 Level: developer 5120 5121 Notes: 5122 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5123 5124 Destroy the matrix with `MatDestroy()` 5125 5126 .seealso: `MatMPIAIJGetLocalMat()` 5127 @*/ 5128 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5129 { 5130 PetscBool mpi; 5131 5132 PetscFunctionBegin; 5133 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5134 if (mpi) { 5135 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5136 } else { 5137 *A_loc = A; 5138 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5139 } 5140 PetscFunctionReturn(0); 5141 } 5142 5143 /*@ 5144 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5145 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5146 with `MatGetSize()` 5147 5148 Not Collective 5149 5150 Input Parameters: 5151 + A - the matrix 5152 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5153 5154 Output Parameter: 5155 . A_loc - the local sequential matrix generated 5156 5157 Level: developer 5158 5159 Notes: 5160 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5161 5162 When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A. 5163 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called. 5164 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5165 modify the values of the returned A_loc. 5166 5167 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5168 @*/ 5169 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5170 { 5171 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5172 Mat_SeqAIJ *mat, *a, *b; 5173 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5174 const PetscScalar *aa, *ba, *aav, *bav; 5175 PetscScalar *ca, *cam; 5176 PetscMPIInt size; 5177 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5178 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5179 PetscBool match; 5180 5181 PetscFunctionBegin; 5182 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5183 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5184 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5185 if (size == 1) { 5186 if (scall == MAT_INITIAL_MATRIX) { 5187 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5188 *A_loc = mpimat->A; 5189 } else if (scall == MAT_REUSE_MATRIX) { 5190 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5191 } 5192 PetscFunctionReturn(0); 5193 } 5194 5195 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5196 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5197 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5198 ai = a->i; 5199 aj = a->j; 5200 bi = b->i; 5201 bj = b->j; 5202 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5203 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5204 aa = aav; 5205 ba = bav; 5206 if (scall == MAT_INITIAL_MATRIX) { 5207 PetscCall(PetscMalloc1(1 + am, &ci)); 5208 ci[0] = 0; 5209 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5210 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5211 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5212 k = 0; 5213 for (i = 0; i < am; i++) { 5214 ncols_o = bi[i + 1] - bi[i]; 5215 ncols_d = ai[i + 1] - ai[i]; 5216 /* off-diagonal portion of A */ 5217 for (jo = 0; jo < ncols_o; jo++) { 5218 col = cmap[*bj]; 5219 if (col >= cstart) break; 5220 cj[k] = col; 5221 bj++; 5222 ca[k++] = *ba++; 5223 } 5224 /* diagonal portion of A */ 5225 for (j = 0; j < ncols_d; j++) { 5226 cj[k] = cstart + *aj++; 5227 ca[k++] = *aa++; 5228 } 5229 /* off-diagonal portion of A */ 5230 for (j = jo; j < ncols_o; j++) { 5231 cj[k] = cmap[*bj++]; 5232 ca[k++] = *ba++; 5233 } 5234 } 5235 /* put together the new matrix */ 5236 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5237 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5238 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5239 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5240 mat->free_a = PETSC_TRUE; 5241 mat->free_ij = PETSC_TRUE; 5242 mat->nonew = 0; 5243 } else if (scall == MAT_REUSE_MATRIX) { 5244 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5245 ci = mat->i; 5246 cj = mat->j; 5247 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5248 for (i = 0; i < am; i++) { 5249 /* off-diagonal portion of A */ 5250 ncols_o = bi[i + 1] - bi[i]; 5251 for (jo = 0; jo < ncols_o; jo++) { 5252 col = cmap[*bj]; 5253 if (col >= cstart) break; 5254 *cam++ = *ba++; 5255 bj++; 5256 } 5257 /* diagonal portion of A */ 5258 ncols_d = ai[i + 1] - ai[i]; 5259 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5260 /* off-diagonal portion of A */ 5261 for (j = jo; j < ncols_o; j++) { 5262 *cam++ = *ba++; 5263 bj++; 5264 } 5265 } 5266 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5267 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5268 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5269 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5270 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5271 PetscFunctionReturn(0); 5272 } 5273 5274 /*@ 5275 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5276 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5277 5278 Not Collective 5279 5280 Input Parameters: 5281 + A - the matrix 5282 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5283 5284 Output Parameters: 5285 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5286 - A_loc - the local sequential matrix generated 5287 5288 Level: developer 5289 5290 Note: 5291 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering) 5292 5293 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5294 @*/ 5295 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5296 { 5297 Mat Ao, Ad; 5298 const PetscInt *cmap; 5299 PetscMPIInt size; 5300 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5301 5302 PetscFunctionBegin; 5303 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5304 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5305 if (size == 1) { 5306 if (scall == MAT_INITIAL_MATRIX) { 5307 PetscCall(PetscObjectReference((PetscObject)Ad)); 5308 *A_loc = Ad; 5309 } else if (scall == MAT_REUSE_MATRIX) { 5310 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5311 } 5312 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5313 PetscFunctionReturn(0); 5314 } 5315 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5316 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5317 if (f) { 5318 PetscCall((*f)(A, scall, glob, A_loc)); 5319 } else { 5320 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5321 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5322 Mat_SeqAIJ *c; 5323 PetscInt *ai = a->i, *aj = a->j; 5324 PetscInt *bi = b->i, *bj = b->j; 5325 PetscInt *ci, *cj; 5326 const PetscScalar *aa, *ba; 5327 PetscScalar *ca; 5328 PetscInt i, j, am, dn, on; 5329 5330 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5331 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5332 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5333 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5334 if (scall == MAT_INITIAL_MATRIX) { 5335 PetscInt k; 5336 PetscCall(PetscMalloc1(1 + am, &ci)); 5337 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5338 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5339 ci[0] = 0; 5340 for (i = 0, k = 0; i < am; i++) { 5341 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5342 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5343 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5344 /* diagonal portion of A */ 5345 for (j = 0; j < ncols_d; j++, k++) { 5346 cj[k] = *aj++; 5347 ca[k] = *aa++; 5348 } 5349 /* off-diagonal portion of A */ 5350 for (j = 0; j < ncols_o; j++, k++) { 5351 cj[k] = dn + *bj++; 5352 ca[k] = *ba++; 5353 } 5354 } 5355 /* put together the new matrix */ 5356 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5357 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5358 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5359 c = (Mat_SeqAIJ *)(*A_loc)->data; 5360 c->free_a = PETSC_TRUE; 5361 c->free_ij = PETSC_TRUE; 5362 c->nonew = 0; 5363 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5364 } else if (scall == MAT_REUSE_MATRIX) { 5365 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5366 for (i = 0; i < am; i++) { 5367 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5368 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5369 /* diagonal portion of A */ 5370 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5371 /* off-diagonal portion of A */ 5372 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5373 } 5374 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5375 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5376 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5377 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5378 if (glob) { 5379 PetscInt cst, *gidx; 5380 5381 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5382 PetscCall(PetscMalloc1(dn + on, &gidx)); 5383 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5384 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5385 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5386 } 5387 } 5388 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5389 PetscFunctionReturn(0); 5390 } 5391 5392 /*@C 5393 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5394 5395 Not Collective 5396 5397 Input Parameters: 5398 + A - the matrix 5399 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5400 - row, col - index sets of rows and columns to extract (or NULL) 5401 5402 Output Parameter: 5403 . A_loc - the local sequential matrix generated 5404 5405 Level: developer 5406 5407 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5408 @*/ 5409 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5410 { 5411 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5412 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5413 IS isrowa, iscola; 5414 Mat *aloc; 5415 PetscBool match; 5416 5417 PetscFunctionBegin; 5418 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5419 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5420 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5421 if (!row) { 5422 start = A->rmap->rstart; 5423 end = A->rmap->rend; 5424 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5425 } else { 5426 isrowa = *row; 5427 } 5428 if (!col) { 5429 start = A->cmap->rstart; 5430 cmap = a->garray; 5431 nzA = a->A->cmap->n; 5432 nzB = a->B->cmap->n; 5433 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5434 ncols = 0; 5435 for (i = 0; i < nzB; i++) { 5436 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5437 else break; 5438 } 5439 imark = i; 5440 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5441 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5442 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5443 } else { 5444 iscola = *col; 5445 } 5446 if (scall != MAT_INITIAL_MATRIX) { 5447 PetscCall(PetscMalloc1(1, &aloc)); 5448 aloc[0] = *A_loc; 5449 } 5450 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5451 if (!col) { /* attach global id of condensed columns */ 5452 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5453 } 5454 *A_loc = aloc[0]; 5455 PetscCall(PetscFree(aloc)); 5456 if (!row) PetscCall(ISDestroy(&isrowa)); 5457 if (!col) PetscCall(ISDestroy(&iscola)); 5458 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5459 PetscFunctionReturn(0); 5460 } 5461 5462 /* 5463 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5464 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5465 * on a global size. 5466 * */ 5467 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5468 { 5469 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5470 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5471 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5472 PetscMPIInt owner; 5473 PetscSFNode *iremote, *oiremote; 5474 const PetscInt *lrowindices; 5475 PetscSF sf, osf; 5476 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5477 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5478 MPI_Comm comm; 5479 ISLocalToGlobalMapping mapping; 5480 const PetscScalar *pd_a, *po_a; 5481 5482 PetscFunctionBegin; 5483 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5484 /* plocalsize is the number of roots 5485 * nrows is the number of leaves 5486 * */ 5487 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5488 PetscCall(ISGetLocalSize(rows, &nrows)); 5489 PetscCall(PetscCalloc1(nrows, &iremote)); 5490 PetscCall(ISGetIndices(rows, &lrowindices)); 5491 for (i = 0; i < nrows; i++) { 5492 /* Find a remote index and an owner for a row 5493 * The row could be local or remote 5494 * */ 5495 owner = 0; 5496 lidx = 0; 5497 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5498 iremote[i].index = lidx; 5499 iremote[i].rank = owner; 5500 } 5501 /* Create SF to communicate how many nonzero columns for each row */ 5502 PetscCall(PetscSFCreate(comm, &sf)); 5503 /* SF will figure out the number of nonzero colunms for each row, and their 5504 * offsets 5505 * */ 5506 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5507 PetscCall(PetscSFSetFromOptions(sf)); 5508 PetscCall(PetscSFSetUp(sf)); 5509 5510 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5511 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5512 PetscCall(PetscCalloc1(nrows, &pnnz)); 5513 roffsets[0] = 0; 5514 roffsets[1] = 0; 5515 for (i = 0; i < plocalsize; i++) { 5516 /* diag */ 5517 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5518 /* off diag */ 5519 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5520 /* compute offsets so that we relative location for each row */ 5521 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5522 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5523 } 5524 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5525 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5526 /* 'r' means root, and 'l' means leaf */ 5527 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5528 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5529 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5530 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5531 PetscCall(PetscSFDestroy(&sf)); 5532 PetscCall(PetscFree(roffsets)); 5533 PetscCall(PetscFree(nrcols)); 5534 dntotalcols = 0; 5535 ontotalcols = 0; 5536 ncol = 0; 5537 for (i = 0; i < nrows; i++) { 5538 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5539 ncol = PetscMax(pnnz[i], ncol); 5540 /* diag */ 5541 dntotalcols += nlcols[i * 2 + 0]; 5542 /* off diag */ 5543 ontotalcols += nlcols[i * 2 + 1]; 5544 } 5545 /* We do not need to figure the right number of columns 5546 * since all the calculations will be done by going through the raw data 5547 * */ 5548 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5549 PetscCall(MatSetUp(*P_oth)); 5550 PetscCall(PetscFree(pnnz)); 5551 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5552 /* diag */ 5553 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5554 /* off diag */ 5555 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5556 /* diag */ 5557 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5558 /* off diag */ 5559 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5560 dntotalcols = 0; 5561 ontotalcols = 0; 5562 ntotalcols = 0; 5563 for (i = 0; i < nrows; i++) { 5564 owner = 0; 5565 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5566 /* Set iremote for diag matrix */ 5567 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5568 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5569 iremote[dntotalcols].rank = owner; 5570 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5571 ilocal[dntotalcols++] = ntotalcols++; 5572 } 5573 /* off diag */ 5574 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5575 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5576 oiremote[ontotalcols].rank = owner; 5577 oilocal[ontotalcols++] = ntotalcols++; 5578 } 5579 } 5580 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5581 PetscCall(PetscFree(loffsets)); 5582 PetscCall(PetscFree(nlcols)); 5583 PetscCall(PetscSFCreate(comm, &sf)); 5584 /* P serves as roots and P_oth is leaves 5585 * Diag matrix 5586 * */ 5587 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5588 PetscCall(PetscSFSetFromOptions(sf)); 5589 PetscCall(PetscSFSetUp(sf)); 5590 5591 PetscCall(PetscSFCreate(comm, &osf)); 5592 /* Off diag */ 5593 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5594 PetscCall(PetscSFSetFromOptions(osf)); 5595 PetscCall(PetscSFSetUp(osf)); 5596 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5597 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5598 /* We operate on the matrix internal data for saving memory */ 5599 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5600 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5601 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5602 /* Convert to global indices for diag matrix */ 5603 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5604 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5605 /* We want P_oth store global indices */ 5606 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5607 /* Use memory scalable approach */ 5608 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5609 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5610 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5611 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5612 /* Convert back to local indices */ 5613 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5614 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5615 nout = 0; 5616 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5617 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5618 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5619 /* Exchange values */ 5620 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5621 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5622 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5623 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5624 /* Stop PETSc from shrinking memory */ 5625 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5626 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5627 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5628 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5629 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5630 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5631 PetscCall(PetscSFDestroy(&sf)); 5632 PetscCall(PetscSFDestroy(&osf)); 5633 PetscFunctionReturn(0); 5634 } 5635 5636 /* 5637 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5638 * This supports MPIAIJ and MAIJ 5639 * */ 5640 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5641 { 5642 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5643 Mat_SeqAIJ *p_oth; 5644 IS rows, map; 5645 PetscHMapI hamp; 5646 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5647 MPI_Comm comm; 5648 PetscSF sf, osf; 5649 PetscBool has; 5650 5651 PetscFunctionBegin; 5652 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5653 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5654 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5655 * and then create a submatrix (that often is an overlapping matrix) 5656 * */ 5657 if (reuse == MAT_INITIAL_MATRIX) { 5658 /* Use a hash table to figure out unique keys */ 5659 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5660 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5661 count = 0; 5662 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5663 for (i = 0; i < a->B->cmap->n; i++) { 5664 key = a->garray[i] / dof; 5665 PetscCall(PetscHMapIHas(hamp, key, &has)); 5666 if (!has) { 5667 mapping[i] = count; 5668 PetscCall(PetscHMapISet(hamp, key, count++)); 5669 } else { 5670 /* Current 'i' has the same value the previous step */ 5671 mapping[i] = count - 1; 5672 } 5673 } 5674 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5675 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5676 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5677 PetscCall(PetscCalloc1(htsize, &rowindices)); 5678 off = 0; 5679 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5680 PetscCall(PetscHMapIDestroy(&hamp)); 5681 PetscCall(PetscSortInt(htsize, rowindices)); 5682 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5683 /* In case, the matrix was already created but users want to recreate the matrix */ 5684 PetscCall(MatDestroy(P_oth)); 5685 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5686 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5687 PetscCall(ISDestroy(&map)); 5688 PetscCall(ISDestroy(&rows)); 5689 } else if (reuse == MAT_REUSE_MATRIX) { 5690 /* If matrix was already created, we simply update values using SF objects 5691 * that as attached to the matrix earlier. 5692 */ 5693 const PetscScalar *pd_a, *po_a; 5694 5695 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5696 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5697 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5698 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5699 /* Update values in place */ 5700 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5701 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5702 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5703 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5704 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5705 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5706 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5707 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5708 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5709 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5710 PetscFunctionReturn(0); 5711 } 5712 5713 /*@C 5714 MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A 5715 5716 Collective 5717 5718 Input Parameters: 5719 + A - the first matrix in `MATMPIAIJ` format 5720 . B - the second matrix in `MATMPIAIJ` format 5721 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5722 5723 Output Parameters: 5724 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5725 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5726 - B_seq - the sequential matrix generated 5727 5728 Level: developer 5729 5730 @*/ 5731 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5732 { 5733 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5734 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5735 IS isrowb, iscolb; 5736 Mat *bseq = NULL; 5737 5738 PetscFunctionBegin; 5739 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5740 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5741 } 5742 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5743 5744 if (scall == MAT_INITIAL_MATRIX) { 5745 start = A->cmap->rstart; 5746 cmap = a->garray; 5747 nzA = a->A->cmap->n; 5748 nzB = a->B->cmap->n; 5749 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5750 ncols = 0; 5751 for (i = 0; i < nzB; i++) { /* row < local row index */ 5752 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5753 else break; 5754 } 5755 imark = i; 5756 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5757 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5758 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5759 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5760 } else { 5761 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5762 isrowb = *rowb; 5763 iscolb = *colb; 5764 PetscCall(PetscMalloc1(1, &bseq)); 5765 bseq[0] = *B_seq; 5766 } 5767 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5768 *B_seq = bseq[0]; 5769 PetscCall(PetscFree(bseq)); 5770 if (!rowb) { 5771 PetscCall(ISDestroy(&isrowb)); 5772 } else { 5773 *rowb = isrowb; 5774 } 5775 if (!colb) { 5776 PetscCall(ISDestroy(&iscolb)); 5777 } else { 5778 *colb = iscolb; 5779 } 5780 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5781 PetscFunctionReturn(0); 5782 } 5783 5784 /* 5785 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5786 of the OFF-DIAGONAL portion of local A 5787 5788 Collective 5789 5790 Input Parameters: 5791 + A,B - the matrices in mpiaij format 5792 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5793 5794 Output Parameter: 5795 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5796 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5797 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5798 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5799 5800 Developer Note: 5801 This directly accesses information inside the VecScatter associated with the matrix-vector product 5802 for this matrix. This is not desirable.. 5803 5804 Level: developer 5805 5806 */ 5807 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5808 { 5809 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5810 Mat_SeqAIJ *b_oth; 5811 VecScatter ctx; 5812 MPI_Comm comm; 5813 const PetscMPIInt *rprocs, *sprocs; 5814 const PetscInt *srow, *rstarts, *sstarts; 5815 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5816 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5817 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5818 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5819 PetscMPIInt size, tag, rank, nreqs; 5820 5821 PetscFunctionBegin; 5822 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5823 PetscCallMPI(MPI_Comm_size(comm, &size)); 5824 5825 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5826 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5827 } 5828 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5829 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5830 5831 if (size == 1) { 5832 startsj_s = NULL; 5833 bufa_ptr = NULL; 5834 *B_oth = NULL; 5835 PetscFunctionReturn(0); 5836 } 5837 5838 ctx = a->Mvctx; 5839 tag = ((PetscObject)ctx)->tag; 5840 5841 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5842 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5843 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5844 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5845 PetscCall(PetscMalloc1(nreqs, &reqs)); 5846 rwaits = reqs; 5847 swaits = reqs + nrecvs; 5848 5849 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5850 if (scall == MAT_INITIAL_MATRIX) { 5851 /* i-array */ 5852 /*---------*/ 5853 /* post receives */ 5854 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5855 for (i = 0; i < nrecvs; i++) { 5856 rowlen = rvalues + rstarts[i] * rbs; 5857 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5858 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5859 } 5860 5861 /* pack the outgoing message */ 5862 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5863 5864 sstartsj[0] = 0; 5865 rstartsj[0] = 0; 5866 len = 0; /* total length of j or a array to be sent */ 5867 if (nsends) { 5868 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5869 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5870 } 5871 for (i = 0; i < nsends; i++) { 5872 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5873 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5874 for (j = 0; j < nrows; j++) { 5875 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5876 for (l = 0; l < sbs; l++) { 5877 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5878 5879 rowlen[j * sbs + l] = ncols; 5880 5881 len += ncols; 5882 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5883 } 5884 k++; 5885 } 5886 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5887 5888 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5889 } 5890 /* recvs and sends of i-array are completed */ 5891 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5892 PetscCall(PetscFree(svalues)); 5893 5894 /* allocate buffers for sending j and a arrays */ 5895 PetscCall(PetscMalloc1(len + 1, &bufj)); 5896 PetscCall(PetscMalloc1(len + 1, &bufa)); 5897 5898 /* create i-array of B_oth */ 5899 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5900 5901 b_othi[0] = 0; 5902 len = 0; /* total length of j or a array to be received */ 5903 k = 0; 5904 for (i = 0; i < nrecvs; i++) { 5905 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5906 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5907 for (j = 0; j < nrows; j++) { 5908 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5909 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5910 k++; 5911 } 5912 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5913 } 5914 PetscCall(PetscFree(rvalues)); 5915 5916 /* allocate space for j and a arrays of B_oth */ 5917 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5918 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5919 5920 /* j-array */ 5921 /*---------*/ 5922 /* post receives of j-array */ 5923 for (i = 0; i < nrecvs; i++) { 5924 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5925 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5926 } 5927 5928 /* pack the outgoing message j-array */ 5929 if (nsends) k = sstarts[0]; 5930 for (i = 0; i < nsends; i++) { 5931 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5932 bufJ = bufj + sstartsj[i]; 5933 for (j = 0; j < nrows; j++) { 5934 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5935 for (ll = 0; ll < sbs; ll++) { 5936 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5937 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5938 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5939 } 5940 } 5941 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5942 } 5943 5944 /* recvs and sends of j-array are completed */ 5945 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5946 } else if (scall == MAT_REUSE_MATRIX) { 5947 sstartsj = *startsj_s; 5948 rstartsj = *startsj_r; 5949 bufa = *bufa_ptr; 5950 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5951 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5952 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5953 5954 /* a-array */ 5955 /*---------*/ 5956 /* post receives of a-array */ 5957 for (i = 0; i < nrecvs; i++) { 5958 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5959 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5960 } 5961 5962 /* pack the outgoing message a-array */ 5963 if (nsends) k = sstarts[0]; 5964 for (i = 0; i < nsends; i++) { 5965 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5966 bufA = bufa + sstartsj[i]; 5967 for (j = 0; j < nrows; j++) { 5968 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5969 for (ll = 0; ll < sbs; ll++) { 5970 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5971 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5972 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5973 } 5974 } 5975 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5976 } 5977 /* recvs and sends of a-array are completed */ 5978 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5979 PetscCall(PetscFree(reqs)); 5980 5981 if (scall == MAT_INITIAL_MATRIX) { 5982 /* put together the new matrix */ 5983 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5984 5985 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5986 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5987 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5988 b_oth->free_a = PETSC_TRUE; 5989 b_oth->free_ij = PETSC_TRUE; 5990 b_oth->nonew = 0; 5991 5992 PetscCall(PetscFree(bufj)); 5993 if (!startsj_s || !bufa_ptr) { 5994 PetscCall(PetscFree2(sstartsj, rstartsj)); 5995 PetscCall(PetscFree(bufa_ptr)); 5996 } else { 5997 *startsj_s = sstartsj; 5998 *startsj_r = rstartsj; 5999 *bufa_ptr = bufa; 6000 } 6001 } else if (scall == MAT_REUSE_MATRIX) { 6002 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6003 } 6004 6005 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6006 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6007 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6008 PetscFunctionReturn(0); 6009 } 6010 6011 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6012 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6013 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6014 #if defined(PETSC_HAVE_MKL_SPARSE) 6015 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6016 #endif 6017 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6018 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6019 #if defined(PETSC_HAVE_ELEMENTAL) 6020 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6021 #endif 6022 #if defined(PETSC_HAVE_SCALAPACK) 6023 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6024 #endif 6025 #if defined(PETSC_HAVE_HYPRE) 6026 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6027 #endif 6028 #if defined(PETSC_HAVE_CUDA) 6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6030 #endif 6031 #if defined(PETSC_HAVE_HIP) 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6033 #endif 6034 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6036 #endif 6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6038 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6039 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6040 6041 /* 6042 Computes (B'*A')' since computing B*A directly is untenable 6043 6044 n p p 6045 [ ] [ ] [ ] 6046 m [ A ] * n [ B ] = m [ C ] 6047 [ ] [ ] [ ] 6048 6049 */ 6050 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6051 { 6052 Mat At, Bt, Ct; 6053 6054 PetscFunctionBegin; 6055 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6056 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6057 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6058 PetscCall(MatDestroy(&At)); 6059 PetscCall(MatDestroy(&Bt)); 6060 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6061 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6062 PetscCall(MatDestroy(&Ct)); 6063 PetscFunctionReturn(0); 6064 } 6065 6066 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6067 { 6068 PetscBool cisdense; 6069 6070 PetscFunctionBegin; 6071 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6072 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6073 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6074 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6075 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6076 PetscCall(MatSetUp(C)); 6077 6078 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6079 PetscFunctionReturn(0); 6080 } 6081 6082 /* ----------------------------------------------------------------*/ 6083 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6084 { 6085 Mat_Product *product = C->product; 6086 Mat A = product->A, B = product->B; 6087 6088 PetscFunctionBegin; 6089 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6090 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6091 6092 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6093 C->ops->productsymbolic = MatProductSymbolic_AB; 6094 PetscFunctionReturn(0); 6095 } 6096 6097 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6098 { 6099 Mat_Product *product = C->product; 6100 6101 PetscFunctionBegin; 6102 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6103 PetscFunctionReturn(0); 6104 } 6105 6106 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6107 6108 Input Parameters: 6109 6110 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6111 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6112 6113 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6114 6115 For Set1, j1[] contains column indices of the nonzeros. 6116 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6117 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6118 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6119 6120 Similar for Set2. 6121 6122 This routine merges the two sets of nonzeros row by row and removes repeats. 6123 6124 Output Parameters: (memory is allocated by the caller) 6125 6126 i[],j[]: the CSR of the merged matrix, which has m rows. 6127 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6128 imap2[]: similar to imap1[], but for Set2. 6129 Note we order nonzeros row-by-row and from left to right. 6130 */ 6131 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6132 { 6133 PetscInt r, m; /* Row index of mat */ 6134 PetscCount t, t1, t2, b1, e1, b2, e2; 6135 6136 PetscFunctionBegin; 6137 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6138 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6139 i[0] = 0; 6140 for (r = 0; r < m; r++) { /* Do row by row merging */ 6141 b1 = rowBegin1[r]; 6142 e1 = rowEnd1[r]; 6143 b2 = rowBegin2[r]; 6144 e2 = rowEnd2[r]; 6145 while (b1 < e1 && b2 < e2) { 6146 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6147 j[t] = j1[b1]; 6148 imap1[t1] = t; 6149 imap2[t2] = t; 6150 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6151 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6152 t1++; 6153 t2++; 6154 t++; 6155 } else if (j1[b1] < j2[b2]) { 6156 j[t] = j1[b1]; 6157 imap1[t1] = t; 6158 b1 += jmap1[t1 + 1] - jmap1[t1]; 6159 t1++; 6160 t++; 6161 } else { 6162 j[t] = j2[b2]; 6163 imap2[t2] = t; 6164 b2 += jmap2[t2 + 1] - jmap2[t2]; 6165 t2++; 6166 t++; 6167 } 6168 } 6169 /* Merge the remaining in either j1[] or j2[] */ 6170 while (b1 < e1) { 6171 j[t] = j1[b1]; 6172 imap1[t1] = t; 6173 b1 += jmap1[t1 + 1] - jmap1[t1]; 6174 t1++; 6175 t++; 6176 } 6177 while (b2 < e2) { 6178 j[t] = j2[b2]; 6179 imap2[t2] = t; 6180 b2 += jmap2[t2 + 1] - jmap2[t2]; 6181 t2++; 6182 t++; 6183 } 6184 i[r + 1] = t; 6185 } 6186 PetscFunctionReturn(0); 6187 } 6188 6189 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6190 6191 Input Parameters: 6192 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6193 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6194 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6195 6196 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6197 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6198 6199 Output Parameters: 6200 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6201 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6202 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6203 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6204 6205 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6206 Atot: number of entries belonging to the diagonal block. 6207 Annz: number of unique nonzeros belonging to the diagonal block. 6208 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6209 repeats (i.e., same 'i,j' pair). 6210 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6211 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6212 6213 Atot: number of entries belonging to the diagonal block 6214 Annz: number of unique nonzeros belonging to the diagonal block. 6215 6216 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6217 6218 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6219 */ 6220 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6221 { 6222 PetscInt cstart, cend, rstart, rend, row, col; 6223 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6224 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6225 PetscCount k, m, p, q, r, s, mid; 6226 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6227 6228 PetscFunctionBegin; 6229 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6230 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6231 m = rend - rstart; 6232 6233 for (k = 0; k < n; k++) { 6234 if (i[k] >= 0) break; 6235 } /* Skip negative rows */ 6236 6237 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6238 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6239 */ 6240 while (k < n) { 6241 row = i[k]; 6242 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6243 for (s = k; s < n; s++) 6244 if (i[s] != row) break; 6245 for (p = k; p < s; p++) { 6246 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6247 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6248 } 6249 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6250 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6251 rowBegin[row - rstart] = k; 6252 rowMid[row - rstart] = mid; 6253 rowEnd[row - rstart] = s; 6254 6255 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6256 Atot += mid - k; 6257 Btot += s - mid; 6258 6259 /* Count unique nonzeros of this diag/offdiag row */ 6260 for (p = k; p < mid;) { 6261 col = j[p]; 6262 do { 6263 j[p] += PETSC_MAX_INT; 6264 p++; 6265 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6266 Annz++; 6267 } 6268 6269 for (p = mid; p < s;) { 6270 col = j[p]; 6271 do { 6272 p++; 6273 } while (p < s && j[p] == col); 6274 Bnnz++; 6275 } 6276 k = s; 6277 } 6278 6279 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6280 PetscCall(PetscMalloc1(Atot, &Aperm)); 6281 PetscCall(PetscMalloc1(Btot, &Bperm)); 6282 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6283 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6284 6285 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6286 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6287 for (r = 0; r < m; r++) { 6288 k = rowBegin[r]; 6289 mid = rowMid[r]; 6290 s = rowEnd[r]; 6291 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6292 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6293 Atot += mid - k; 6294 Btot += s - mid; 6295 6296 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6297 for (p = k; p < mid;) { 6298 col = j[p]; 6299 q = p; 6300 do { 6301 p++; 6302 } while (p < mid && j[p] == col); 6303 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6304 Annz++; 6305 } 6306 6307 for (p = mid; p < s;) { 6308 col = j[p]; 6309 q = p; 6310 do { 6311 p++; 6312 } while (p < s && j[p] == col); 6313 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6314 Bnnz++; 6315 } 6316 } 6317 /* Output */ 6318 *Aperm_ = Aperm; 6319 *Annz_ = Annz; 6320 *Atot_ = Atot; 6321 *Ajmap_ = Ajmap; 6322 *Bperm_ = Bperm; 6323 *Bnnz_ = Bnnz; 6324 *Btot_ = Btot; 6325 *Bjmap_ = Bjmap; 6326 PetscFunctionReturn(0); 6327 } 6328 6329 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6330 6331 Input Parameters: 6332 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6333 nnz: number of unique nonzeros in the merged matrix 6334 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6335 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6336 6337 Output Parameter: (memory is allocated by the caller) 6338 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6339 6340 Example: 6341 nnz1 = 4 6342 nnz = 6 6343 imap = [1,3,4,5] 6344 jmap = [0,3,5,6,7] 6345 then, 6346 jmap_new = [0,0,3,3,5,6,7] 6347 */ 6348 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6349 { 6350 PetscCount k, p; 6351 6352 PetscFunctionBegin; 6353 jmap_new[0] = 0; 6354 p = nnz; /* p loops over jmap_new[] backwards */ 6355 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6356 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6357 } 6358 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6359 PetscFunctionReturn(0); 6360 } 6361 6362 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6363 { 6364 MPI_Comm comm; 6365 PetscMPIInt rank, size; 6366 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6367 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6368 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6369 6370 PetscFunctionBegin; 6371 PetscCall(PetscFree(mpiaij->garray)); 6372 PetscCall(VecDestroy(&mpiaij->lvec)); 6373 #if defined(PETSC_USE_CTABLE) 6374 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6375 #else 6376 PetscCall(PetscFree(mpiaij->colmap)); 6377 #endif 6378 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6379 mat->assembled = PETSC_FALSE; 6380 mat->was_assembled = PETSC_FALSE; 6381 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6382 6383 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6384 PetscCallMPI(MPI_Comm_size(comm, &size)); 6385 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6386 PetscCall(PetscLayoutSetUp(mat->rmap)); 6387 PetscCall(PetscLayoutSetUp(mat->cmap)); 6388 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6389 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6390 PetscCall(MatGetLocalSize(mat, &m, &n)); 6391 PetscCall(MatGetSize(mat, &M, &N)); 6392 6393 /* ---------------------------------------------------------------------------*/ 6394 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6395 /* entries come first, then local rows, then remote rows. */ 6396 /* ---------------------------------------------------------------------------*/ 6397 PetscCount n1 = coo_n, *perm1; 6398 PetscInt *i1 = coo_i, *j1 = coo_j; 6399 6400 PetscCall(PetscMalloc1(n1, &perm1)); 6401 for (k = 0; k < n1; k++) perm1[k] = k; 6402 6403 /* Manipulate indices so that entries with negative row or col indices will have smallest 6404 row indices, local entries will have greater but negative row indices, and remote entries 6405 will have positive row indices. 6406 */ 6407 for (k = 0; k < n1; k++) { 6408 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6409 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6410 else { 6411 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6412 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6413 } 6414 } 6415 6416 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6417 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6418 for (k = 0; k < n1; k++) { 6419 if (i1[k] > PETSC_MIN_INT) break; 6420 } /* Advance k to the first entry we need to take care of */ 6421 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6422 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6423 6424 /* ---------------------------------------------------------------------------*/ 6425 /* Split local rows into diag/offdiag portions */ 6426 /* ---------------------------------------------------------------------------*/ 6427 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6428 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6429 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6430 6431 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6432 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6433 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6434 6435 /* ---------------------------------------------------------------------------*/ 6436 /* Send remote rows to their owner */ 6437 /* ---------------------------------------------------------------------------*/ 6438 /* Find which rows should be sent to which remote ranks*/ 6439 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6440 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6441 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6442 const PetscInt *ranges; 6443 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6444 6445 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6446 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6447 for (k = rem; k < n1;) { 6448 PetscMPIInt owner; 6449 PetscInt firstRow, lastRow; 6450 6451 /* Locate a row range */ 6452 firstRow = i1[k]; /* first row of this owner */ 6453 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6454 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6455 6456 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6457 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6458 6459 /* All entries in [k,p) belong to this remote owner */ 6460 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6461 PetscMPIInt *sendto2; 6462 PetscInt *nentries2; 6463 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6464 6465 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6466 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6467 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6468 PetscCall(PetscFree2(sendto, nentries2)); 6469 sendto = sendto2; 6470 nentries = nentries2; 6471 maxNsend = maxNsend2; 6472 } 6473 sendto[nsend] = owner; 6474 nentries[nsend] = p - k; 6475 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6476 nsend++; 6477 k = p; 6478 } 6479 6480 /* Build 1st SF to know offsets on remote to send data */ 6481 PetscSF sf1; 6482 PetscInt nroots = 1, nroots2 = 0; 6483 PetscInt nleaves = nsend, nleaves2 = 0; 6484 PetscInt *offsets; 6485 PetscSFNode *iremote; 6486 6487 PetscCall(PetscSFCreate(comm, &sf1)); 6488 PetscCall(PetscMalloc1(nsend, &iremote)); 6489 PetscCall(PetscMalloc1(nsend, &offsets)); 6490 for (k = 0; k < nsend; k++) { 6491 iremote[k].rank = sendto[k]; 6492 iremote[k].index = 0; 6493 nleaves2 += nentries[k]; 6494 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6495 } 6496 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6497 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6498 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6499 PetscCall(PetscSFDestroy(&sf1)); 6500 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6501 6502 /* Build 2nd SF to send remote COOs to their owner */ 6503 PetscSF sf2; 6504 nroots = nroots2; 6505 nleaves = nleaves2; 6506 PetscCall(PetscSFCreate(comm, &sf2)); 6507 PetscCall(PetscSFSetFromOptions(sf2)); 6508 PetscCall(PetscMalloc1(nleaves, &iremote)); 6509 p = 0; 6510 for (k = 0; k < nsend; k++) { 6511 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6512 for (q = 0; q < nentries[k]; q++, p++) { 6513 iremote[p].rank = sendto[k]; 6514 iremote[p].index = offsets[k] + q; 6515 } 6516 } 6517 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6518 6519 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6520 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6521 6522 /* Send the remote COOs to their owner */ 6523 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6524 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6525 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6526 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6527 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6528 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6529 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6530 6531 PetscCall(PetscFree(offsets)); 6532 PetscCall(PetscFree2(sendto, nentries)); 6533 6534 /* ---------------------------------------------------------------*/ 6535 /* Sort received COOs by row along with the permutation array */ 6536 /* ---------------------------------------------------------------*/ 6537 for (k = 0; k < n2; k++) perm2[k] = k; 6538 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6539 6540 /* ---------------------------------------------------------------*/ 6541 /* Split received COOs into diag/offdiag portions */ 6542 /* ---------------------------------------------------------------*/ 6543 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6544 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6545 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6546 6547 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6548 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6549 6550 /* --------------------------------------------------------------------------*/ 6551 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6552 /* --------------------------------------------------------------------------*/ 6553 PetscInt *Ai, *Bi; 6554 PetscInt *Aj, *Bj; 6555 6556 PetscCall(PetscMalloc1(m + 1, &Ai)); 6557 PetscCall(PetscMalloc1(m + 1, &Bi)); 6558 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6559 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6560 6561 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6562 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6563 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6564 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6565 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6566 6567 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6568 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6569 6570 /* --------------------------------------------------------------------------*/ 6571 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6572 /* expect nonzeros in A/B most likely have local contributing entries */ 6573 /* --------------------------------------------------------------------------*/ 6574 PetscInt Annz = Ai[m]; 6575 PetscInt Bnnz = Bi[m]; 6576 PetscCount *Ajmap1_new, *Bjmap1_new; 6577 6578 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6579 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6580 6581 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6582 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6583 6584 PetscCall(PetscFree(Aimap1)); 6585 PetscCall(PetscFree(Ajmap1)); 6586 PetscCall(PetscFree(Bimap1)); 6587 PetscCall(PetscFree(Bjmap1)); 6588 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6589 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6590 PetscCall(PetscFree(perm1)); 6591 PetscCall(PetscFree3(i2, j2, perm2)); 6592 6593 Ajmap1 = Ajmap1_new; 6594 Bjmap1 = Bjmap1_new; 6595 6596 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6597 if (Annz < Annz1 + Annz2) { 6598 PetscInt *Aj_new; 6599 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6600 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6601 PetscCall(PetscFree(Aj)); 6602 Aj = Aj_new; 6603 } 6604 6605 if (Bnnz < Bnnz1 + Bnnz2) { 6606 PetscInt *Bj_new; 6607 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6608 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6609 PetscCall(PetscFree(Bj)); 6610 Bj = Bj_new; 6611 } 6612 6613 /* --------------------------------------------------------------------------------*/ 6614 /* Create new submatrices for on-process and off-process coupling */ 6615 /* --------------------------------------------------------------------------------*/ 6616 PetscScalar *Aa, *Ba; 6617 MatType rtype; 6618 Mat_SeqAIJ *a, *b; 6619 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6620 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6621 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6622 if (cstart) { 6623 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6624 } 6625 PetscCall(MatDestroy(&mpiaij->A)); 6626 PetscCall(MatDestroy(&mpiaij->B)); 6627 PetscCall(MatGetRootType_Private(mat, &rtype)); 6628 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6629 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6630 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6631 6632 a = (Mat_SeqAIJ *)mpiaij->A->data; 6633 b = (Mat_SeqAIJ *)mpiaij->B->data; 6634 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6635 a->free_a = b->free_a = PETSC_TRUE; 6636 a->free_ij = b->free_ij = PETSC_TRUE; 6637 6638 /* conversion must happen AFTER multiply setup */ 6639 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6640 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6641 PetscCall(VecDestroy(&mpiaij->lvec)); 6642 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6643 6644 mpiaij->coo_n = coo_n; 6645 mpiaij->coo_sf = sf2; 6646 mpiaij->sendlen = nleaves; 6647 mpiaij->recvlen = nroots; 6648 6649 mpiaij->Annz = Annz; 6650 mpiaij->Bnnz = Bnnz; 6651 6652 mpiaij->Annz2 = Annz2; 6653 mpiaij->Bnnz2 = Bnnz2; 6654 6655 mpiaij->Atot1 = Atot1; 6656 mpiaij->Atot2 = Atot2; 6657 mpiaij->Btot1 = Btot1; 6658 mpiaij->Btot2 = Btot2; 6659 6660 mpiaij->Ajmap1 = Ajmap1; 6661 mpiaij->Aperm1 = Aperm1; 6662 6663 mpiaij->Bjmap1 = Bjmap1; 6664 mpiaij->Bperm1 = Bperm1; 6665 6666 mpiaij->Aimap2 = Aimap2; 6667 mpiaij->Ajmap2 = Ajmap2; 6668 mpiaij->Aperm2 = Aperm2; 6669 6670 mpiaij->Bimap2 = Bimap2; 6671 mpiaij->Bjmap2 = Bjmap2; 6672 mpiaij->Bperm2 = Bperm2; 6673 6674 mpiaij->Cperm1 = Cperm1; 6675 6676 /* Allocate in preallocation. If not used, it has zero cost on host */ 6677 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6678 PetscFunctionReturn(0); 6679 } 6680 6681 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6682 { 6683 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6684 Mat A = mpiaij->A, B = mpiaij->B; 6685 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6686 PetscScalar *Aa, *Ba; 6687 PetscScalar *sendbuf = mpiaij->sendbuf; 6688 PetscScalar *recvbuf = mpiaij->recvbuf; 6689 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6690 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6691 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6692 const PetscCount *Cperm1 = mpiaij->Cperm1; 6693 6694 PetscFunctionBegin; 6695 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6696 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6697 6698 /* Pack entries to be sent to remote */ 6699 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6700 6701 /* Send remote entries to their owner and overlap the communication with local computation */ 6702 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6703 /* Add local entries to A and B */ 6704 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6705 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6706 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6707 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6708 } 6709 for (PetscCount i = 0; i < Bnnz; i++) { 6710 PetscScalar sum = 0.0; 6711 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6712 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6713 } 6714 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6715 6716 /* Add received remote entries to A and B */ 6717 for (PetscCount i = 0; i < Annz2; i++) { 6718 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6719 } 6720 for (PetscCount i = 0; i < Bnnz2; i++) { 6721 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6722 } 6723 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6724 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6725 PetscFunctionReturn(0); 6726 } 6727 6728 /* ----------------------------------------------------------------*/ 6729 6730 /*MC 6731 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6732 6733 Options Database Keys: 6734 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6735 6736 Level: beginner 6737 6738 Notes: 6739 `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values, 6740 in this case the values associated with the rows and columns one passes in are set to zero 6741 in the matrix 6742 6743 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6744 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6745 6746 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6747 M*/ 6748 6749 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6750 { 6751 Mat_MPIAIJ *b; 6752 PetscMPIInt size; 6753 6754 PetscFunctionBegin; 6755 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6756 6757 PetscCall(PetscNew(&b)); 6758 B->data = (void *)b; 6759 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6760 B->assembled = PETSC_FALSE; 6761 B->insertmode = NOT_SET_VALUES; 6762 b->size = size; 6763 6764 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6765 6766 /* build cache for off array entries formed */ 6767 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6768 6769 b->donotstash = PETSC_FALSE; 6770 b->colmap = NULL; 6771 b->garray = NULL; 6772 b->roworiented = PETSC_TRUE; 6773 6774 /* stuff used for matrix vector multiply */ 6775 b->lvec = NULL; 6776 b->Mvctx = NULL; 6777 6778 /* stuff for MatGetRow() */ 6779 b->rowindices = NULL; 6780 b->rowvalues = NULL; 6781 b->getrowactive = PETSC_FALSE; 6782 6783 /* flexible pointer used in CUSPARSE classes */ 6784 b->spptr = NULL; 6785 6786 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6787 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6788 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6789 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6790 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6791 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6792 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6793 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6794 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6795 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6796 #if defined(PETSC_HAVE_CUDA) 6797 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6798 #endif 6799 #if defined(PETSC_HAVE_HIP) 6800 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6801 #endif 6802 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6803 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6804 #endif 6805 #if defined(PETSC_HAVE_MKL_SPARSE) 6806 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6807 #endif 6808 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6809 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6810 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6811 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6812 #if defined(PETSC_HAVE_ELEMENTAL) 6813 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6814 #endif 6815 #if defined(PETSC_HAVE_SCALAPACK) 6816 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6817 #endif 6818 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6819 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6820 #if defined(PETSC_HAVE_HYPRE) 6821 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6822 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6823 #endif 6824 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6825 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6826 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6827 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6828 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6829 PetscFunctionReturn(0); 6830 } 6831 6832 /*@C 6833 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6834 and "off-diagonal" part of the matrix in CSR format. 6835 6836 Collective 6837 6838 Input Parameters: 6839 + comm - MPI communicator 6840 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6841 . n - This value should be the same as the local size used in creating the 6842 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6843 calculated if N is given) For square matrices n is almost always m. 6844 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 6845 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 6846 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6847 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6848 . a - matrix values 6849 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6850 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6851 - oa - matrix values 6852 6853 Output Parameter: 6854 . mat - the matrix 6855 6856 Level: advanced 6857 6858 Notes: 6859 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6860 must free the arrays once the matrix has been destroyed and not before. 6861 6862 The i and j indices are 0 based 6863 6864 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6865 6866 This sets local rows and cannot be used to set off-processor values. 6867 6868 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6869 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6870 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6871 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6872 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6873 communication if it is known that only local entries will be set. 6874 6875 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6876 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6877 @*/ 6878 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6879 { 6880 Mat_MPIAIJ *maij; 6881 6882 PetscFunctionBegin; 6883 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6884 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6885 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6886 PetscCall(MatCreate(comm, mat)); 6887 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6888 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6889 maij = (Mat_MPIAIJ *)(*mat)->data; 6890 6891 (*mat)->preallocated = PETSC_TRUE; 6892 6893 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6894 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6895 6896 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6897 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6898 6899 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6900 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6901 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6902 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6903 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6904 PetscFunctionReturn(0); 6905 } 6906 6907 typedef struct { 6908 Mat *mp; /* intermediate products */ 6909 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6910 PetscInt cp; /* number of intermediate products */ 6911 6912 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6913 PetscInt *startsj_s, *startsj_r; 6914 PetscScalar *bufa; 6915 Mat P_oth; 6916 6917 /* may take advantage of merging product->B */ 6918 Mat Bloc; /* B-local by merging diag and off-diag */ 6919 6920 /* cusparse does not have support to split between symbolic and numeric phases. 6921 When api_user is true, we don't need to update the numerical values 6922 of the temporary storage */ 6923 PetscBool reusesym; 6924 6925 /* support for COO values insertion */ 6926 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6927 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6928 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6929 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6930 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6931 PetscMemType mtype; 6932 6933 /* customization */ 6934 PetscBool abmerge; 6935 PetscBool P_oth_bind; 6936 } MatMatMPIAIJBACKEND; 6937 6938 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6939 { 6940 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6941 PetscInt i; 6942 6943 PetscFunctionBegin; 6944 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6945 PetscCall(PetscFree(mmdata->bufa)); 6946 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6947 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6948 PetscCall(MatDestroy(&mmdata->P_oth)); 6949 PetscCall(MatDestroy(&mmdata->Bloc)); 6950 PetscCall(PetscSFDestroy(&mmdata->sf)); 6951 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6952 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6953 PetscCall(PetscFree(mmdata->own[0])); 6954 PetscCall(PetscFree(mmdata->own)); 6955 PetscCall(PetscFree(mmdata->off[0])); 6956 PetscCall(PetscFree(mmdata->off)); 6957 PetscCall(PetscFree(mmdata)); 6958 PetscFunctionReturn(0); 6959 } 6960 6961 /* Copy selected n entries with indices in idx[] of A to v[]. 6962 If idx is NULL, copy the whole data array of A to v[] 6963 */ 6964 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6965 { 6966 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6967 6968 PetscFunctionBegin; 6969 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 6970 if (f) { 6971 PetscCall((*f)(A, n, idx, v)); 6972 } else { 6973 const PetscScalar *vv; 6974 6975 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 6976 if (n && idx) { 6977 PetscScalar *w = v; 6978 const PetscInt *oi = idx; 6979 PetscInt j; 6980 6981 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6982 } else { 6983 PetscCall(PetscArraycpy(v, vv, n)); 6984 } 6985 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 6986 } 6987 PetscFunctionReturn(0); 6988 } 6989 6990 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6991 { 6992 MatMatMPIAIJBACKEND *mmdata; 6993 PetscInt i, n_d, n_o; 6994 6995 PetscFunctionBegin; 6996 MatCheckProduct(C, 1); 6997 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 6998 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 6999 if (!mmdata->reusesym) { /* update temporary matrices */ 7000 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7001 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7002 } 7003 mmdata->reusesym = PETSC_FALSE; 7004 7005 for (i = 0; i < mmdata->cp; i++) { 7006 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7007 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7008 } 7009 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7010 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7011 7012 if (mmdata->mptmp[i]) continue; 7013 if (noff) { 7014 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7015 7016 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7017 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7018 n_o += noff; 7019 n_d += nown; 7020 } else { 7021 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7022 7023 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7024 n_d += mm->nz; 7025 } 7026 } 7027 if (mmdata->hasoffproc) { /* offprocess insertion */ 7028 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7029 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7030 } 7031 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7032 PetscFunctionReturn(0); 7033 } 7034 7035 /* Support for Pt * A, A * P, or Pt * A * P */ 7036 #define MAX_NUMBER_INTERMEDIATE 4 7037 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7038 { 7039 Mat_Product *product = C->product; 7040 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7041 Mat_MPIAIJ *a, *p; 7042 MatMatMPIAIJBACKEND *mmdata; 7043 ISLocalToGlobalMapping P_oth_l2g = NULL; 7044 IS glob = NULL; 7045 const char *prefix; 7046 char pprefix[256]; 7047 const PetscInt *globidx, *P_oth_idx; 7048 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7049 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7050 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7051 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7052 /* a base offset; type-2: sparse with a local to global map table */ 7053 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7054 7055 MatProductType ptype; 7056 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7057 PetscMPIInt size; 7058 7059 PetscFunctionBegin; 7060 MatCheckProduct(C, 1); 7061 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7062 ptype = product->type; 7063 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7064 ptype = MATPRODUCT_AB; 7065 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7066 } 7067 switch (ptype) { 7068 case MATPRODUCT_AB: 7069 A = product->A; 7070 P = product->B; 7071 m = A->rmap->n; 7072 n = P->cmap->n; 7073 M = A->rmap->N; 7074 N = P->cmap->N; 7075 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7076 break; 7077 case MATPRODUCT_AtB: 7078 P = product->A; 7079 A = product->B; 7080 m = P->cmap->n; 7081 n = A->cmap->n; 7082 M = P->cmap->N; 7083 N = A->cmap->N; 7084 hasoffproc = PETSC_TRUE; 7085 break; 7086 case MATPRODUCT_PtAP: 7087 A = product->A; 7088 P = product->B; 7089 m = P->cmap->n; 7090 n = P->cmap->n; 7091 M = P->cmap->N; 7092 N = P->cmap->N; 7093 hasoffproc = PETSC_TRUE; 7094 break; 7095 default: 7096 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7097 } 7098 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7099 if (size == 1) hasoffproc = PETSC_FALSE; 7100 7101 /* defaults */ 7102 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7103 mp[i] = NULL; 7104 mptmp[i] = PETSC_FALSE; 7105 rmapt[i] = -1; 7106 cmapt[i] = -1; 7107 rmapa[i] = NULL; 7108 cmapa[i] = NULL; 7109 } 7110 7111 /* customization */ 7112 PetscCall(PetscNew(&mmdata)); 7113 mmdata->reusesym = product->api_user; 7114 if (ptype == MATPRODUCT_AB) { 7115 if (product->api_user) { 7116 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7117 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7118 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7119 PetscOptionsEnd(); 7120 } else { 7121 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7122 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7123 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7124 PetscOptionsEnd(); 7125 } 7126 } else if (ptype == MATPRODUCT_PtAP) { 7127 if (product->api_user) { 7128 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7129 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7130 PetscOptionsEnd(); 7131 } else { 7132 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7133 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7134 PetscOptionsEnd(); 7135 } 7136 } 7137 a = (Mat_MPIAIJ *)A->data; 7138 p = (Mat_MPIAIJ *)P->data; 7139 PetscCall(MatSetSizes(C, m, n, M, N)); 7140 PetscCall(PetscLayoutSetUp(C->rmap)); 7141 PetscCall(PetscLayoutSetUp(C->cmap)); 7142 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7143 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7144 7145 cp = 0; 7146 switch (ptype) { 7147 case MATPRODUCT_AB: /* A * P */ 7148 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7149 7150 /* A_diag * P_local (merged or not) */ 7151 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7152 /* P is product->B */ 7153 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7154 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7155 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7156 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7157 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7158 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7159 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7160 mp[cp]->product->api_user = product->api_user; 7161 PetscCall(MatProductSetFromOptions(mp[cp])); 7162 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7163 PetscCall(ISGetIndices(glob, &globidx)); 7164 rmapt[cp] = 1; 7165 cmapt[cp] = 2; 7166 cmapa[cp] = globidx; 7167 mptmp[cp] = PETSC_FALSE; 7168 cp++; 7169 } else { /* A_diag * P_diag and A_diag * P_off */ 7170 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7171 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7172 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7173 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7174 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7175 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7176 mp[cp]->product->api_user = product->api_user; 7177 PetscCall(MatProductSetFromOptions(mp[cp])); 7178 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7179 rmapt[cp] = 1; 7180 cmapt[cp] = 1; 7181 mptmp[cp] = PETSC_FALSE; 7182 cp++; 7183 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7184 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7185 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7186 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7187 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7188 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7189 mp[cp]->product->api_user = product->api_user; 7190 PetscCall(MatProductSetFromOptions(mp[cp])); 7191 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7192 rmapt[cp] = 1; 7193 cmapt[cp] = 2; 7194 cmapa[cp] = p->garray; 7195 mptmp[cp] = PETSC_FALSE; 7196 cp++; 7197 } 7198 7199 /* A_off * P_other */ 7200 if (mmdata->P_oth) { 7201 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7202 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7203 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7204 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7205 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7206 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7207 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7208 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7209 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7210 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7211 mp[cp]->product->api_user = product->api_user; 7212 PetscCall(MatProductSetFromOptions(mp[cp])); 7213 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7214 rmapt[cp] = 1; 7215 cmapt[cp] = 2; 7216 cmapa[cp] = P_oth_idx; 7217 mptmp[cp] = PETSC_FALSE; 7218 cp++; 7219 } 7220 break; 7221 7222 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7223 /* A is product->B */ 7224 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7225 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7226 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7227 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7228 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7229 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7230 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7231 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7232 mp[cp]->product->api_user = product->api_user; 7233 PetscCall(MatProductSetFromOptions(mp[cp])); 7234 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7235 PetscCall(ISGetIndices(glob, &globidx)); 7236 rmapt[cp] = 2; 7237 rmapa[cp] = globidx; 7238 cmapt[cp] = 2; 7239 cmapa[cp] = globidx; 7240 mptmp[cp] = PETSC_FALSE; 7241 cp++; 7242 } else { 7243 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7244 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7245 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7246 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7247 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7248 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7249 mp[cp]->product->api_user = product->api_user; 7250 PetscCall(MatProductSetFromOptions(mp[cp])); 7251 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7252 PetscCall(ISGetIndices(glob, &globidx)); 7253 rmapt[cp] = 1; 7254 cmapt[cp] = 2; 7255 cmapa[cp] = globidx; 7256 mptmp[cp] = PETSC_FALSE; 7257 cp++; 7258 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7259 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7260 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7261 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7262 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7263 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7264 mp[cp]->product->api_user = product->api_user; 7265 PetscCall(MatProductSetFromOptions(mp[cp])); 7266 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7267 rmapt[cp] = 2; 7268 rmapa[cp] = p->garray; 7269 cmapt[cp] = 2; 7270 cmapa[cp] = globidx; 7271 mptmp[cp] = PETSC_FALSE; 7272 cp++; 7273 } 7274 break; 7275 case MATPRODUCT_PtAP: 7276 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7277 /* P is product->B */ 7278 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7279 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7280 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7281 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7282 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7283 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7284 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7285 mp[cp]->product->api_user = product->api_user; 7286 PetscCall(MatProductSetFromOptions(mp[cp])); 7287 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7288 PetscCall(ISGetIndices(glob, &globidx)); 7289 rmapt[cp] = 2; 7290 rmapa[cp] = globidx; 7291 cmapt[cp] = 2; 7292 cmapa[cp] = globidx; 7293 mptmp[cp] = PETSC_FALSE; 7294 cp++; 7295 if (mmdata->P_oth) { 7296 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7297 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7298 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7299 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7300 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7301 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7302 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7303 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7304 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7305 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7306 mp[cp]->product->api_user = product->api_user; 7307 PetscCall(MatProductSetFromOptions(mp[cp])); 7308 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7309 mptmp[cp] = PETSC_TRUE; 7310 cp++; 7311 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7312 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7313 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7314 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7315 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7316 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7317 mp[cp]->product->api_user = product->api_user; 7318 PetscCall(MatProductSetFromOptions(mp[cp])); 7319 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7320 rmapt[cp] = 2; 7321 rmapa[cp] = globidx; 7322 cmapt[cp] = 2; 7323 cmapa[cp] = P_oth_idx; 7324 mptmp[cp] = PETSC_FALSE; 7325 cp++; 7326 } 7327 break; 7328 default: 7329 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7330 } 7331 /* sanity check */ 7332 if (size > 1) 7333 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7334 7335 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7336 for (i = 0; i < cp; i++) { 7337 mmdata->mp[i] = mp[i]; 7338 mmdata->mptmp[i] = mptmp[i]; 7339 } 7340 mmdata->cp = cp; 7341 C->product->data = mmdata; 7342 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7343 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7344 7345 /* memory type */ 7346 mmdata->mtype = PETSC_MEMTYPE_HOST; 7347 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7348 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7349 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7350 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7351 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7352 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7353 7354 /* prepare coo coordinates for values insertion */ 7355 7356 /* count total nonzeros of those intermediate seqaij Mats 7357 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7358 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7359 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7360 */ 7361 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7362 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7363 if (mptmp[cp]) continue; 7364 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7365 const PetscInt *rmap = rmapa[cp]; 7366 const PetscInt mr = mp[cp]->rmap->n; 7367 const PetscInt rs = C->rmap->rstart; 7368 const PetscInt re = C->rmap->rend; 7369 const PetscInt *ii = mm->i; 7370 for (i = 0; i < mr; i++) { 7371 const PetscInt gr = rmap[i]; 7372 const PetscInt nz = ii[i + 1] - ii[i]; 7373 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7374 else ncoo_oown += nz; /* this row is local */ 7375 } 7376 } else ncoo_d += mm->nz; 7377 } 7378 7379 /* 7380 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7381 7382 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7383 7384 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7385 7386 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7387 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7388 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7389 7390 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7391 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7392 */ 7393 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7394 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7395 7396 /* gather (i,j) of nonzeros inserted by remote procs */ 7397 if (hasoffproc) { 7398 PetscSF msf; 7399 PetscInt ncoo2, *coo_i2, *coo_j2; 7400 7401 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7402 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7403 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7404 7405 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7406 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7407 PetscInt *idxoff = mmdata->off[cp]; 7408 PetscInt *idxown = mmdata->own[cp]; 7409 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7410 const PetscInt *rmap = rmapa[cp]; 7411 const PetscInt *cmap = cmapa[cp]; 7412 const PetscInt *ii = mm->i; 7413 PetscInt *coi = coo_i + ncoo_o; 7414 PetscInt *coj = coo_j + ncoo_o; 7415 const PetscInt mr = mp[cp]->rmap->n; 7416 const PetscInt rs = C->rmap->rstart; 7417 const PetscInt re = C->rmap->rend; 7418 const PetscInt cs = C->cmap->rstart; 7419 for (i = 0; i < mr; i++) { 7420 const PetscInt *jj = mm->j + ii[i]; 7421 const PetscInt gr = rmap[i]; 7422 const PetscInt nz = ii[i + 1] - ii[i]; 7423 if (gr < rs || gr >= re) { /* this is an offproc row */ 7424 for (j = ii[i]; j < ii[i + 1]; j++) { 7425 *coi++ = gr; 7426 *idxoff++ = j; 7427 } 7428 if (!cmapt[cp]) { /* already global */ 7429 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7430 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7431 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7432 } else { /* offdiag */ 7433 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7434 } 7435 ncoo_o += nz; 7436 } else { /* this is a local row */ 7437 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7438 } 7439 } 7440 } 7441 mmdata->off[cp + 1] = idxoff; 7442 mmdata->own[cp + 1] = idxown; 7443 } 7444 7445 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7446 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7447 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7448 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7449 ncoo = ncoo_d + ncoo_oown + ncoo2; 7450 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7451 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7452 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7453 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7454 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7455 PetscCall(PetscFree2(coo_i, coo_j)); 7456 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7457 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7458 coo_i = coo_i2; 7459 coo_j = coo_j2; 7460 } else { /* no offproc values insertion */ 7461 ncoo = ncoo_d; 7462 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7463 7464 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7465 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7466 PetscCall(PetscSFSetUp(mmdata->sf)); 7467 } 7468 mmdata->hasoffproc = hasoffproc; 7469 7470 /* gather (i,j) of nonzeros inserted locally */ 7471 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7472 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7473 PetscInt *coi = coo_i + ncoo_d; 7474 PetscInt *coj = coo_j + ncoo_d; 7475 const PetscInt *jj = mm->j; 7476 const PetscInt *ii = mm->i; 7477 const PetscInt *cmap = cmapa[cp]; 7478 const PetscInt *rmap = rmapa[cp]; 7479 const PetscInt mr = mp[cp]->rmap->n; 7480 const PetscInt rs = C->rmap->rstart; 7481 const PetscInt re = C->rmap->rend; 7482 const PetscInt cs = C->cmap->rstart; 7483 7484 if (mptmp[cp]) continue; 7485 if (rmapt[cp] == 1) { /* consecutive rows */ 7486 /* fill coo_i */ 7487 for (i = 0; i < mr; i++) { 7488 const PetscInt gr = i + rs; 7489 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7490 } 7491 /* fill coo_j */ 7492 if (!cmapt[cp]) { /* type-0, already global */ 7493 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7494 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7495 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7496 } else { /* type-2, local to global for sparse columns */ 7497 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7498 } 7499 ncoo_d += mm->nz; 7500 } else if (rmapt[cp] == 2) { /* sparse rows */ 7501 for (i = 0; i < mr; i++) { 7502 const PetscInt *jj = mm->j + ii[i]; 7503 const PetscInt gr = rmap[i]; 7504 const PetscInt nz = ii[i + 1] - ii[i]; 7505 if (gr >= rs && gr < re) { /* local rows */ 7506 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7507 if (!cmapt[cp]) { /* type-0, already global */ 7508 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7509 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7510 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7511 } else { /* type-2, local to global for sparse columns */ 7512 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7513 } 7514 ncoo_d += nz; 7515 } 7516 } 7517 } 7518 } 7519 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7520 PetscCall(ISDestroy(&glob)); 7521 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7522 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7523 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7524 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7525 7526 /* preallocate with COO data */ 7527 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7528 PetscCall(PetscFree2(coo_i, coo_j)); 7529 PetscFunctionReturn(0); 7530 } 7531 7532 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7533 { 7534 Mat_Product *product = mat->product; 7535 #if defined(PETSC_HAVE_DEVICE) 7536 PetscBool match = PETSC_FALSE; 7537 PetscBool usecpu = PETSC_FALSE; 7538 #else 7539 PetscBool match = PETSC_TRUE; 7540 #endif 7541 7542 PetscFunctionBegin; 7543 MatCheckProduct(mat, 1); 7544 #if defined(PETSC_HAVE_DEVICE) 7545 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7546 if (match) { /* we can always fallback to the CPU if requested */ 7547 switch (product->type) { 7548 case MATPRODUCT_AB: 7549 if (product->api_user) { 7550 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7551 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7552 PetscOptionsEnd(); 7553 } else { 7554 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7555 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7556 PetscOptionsEnd(); 7557 } 7558 break; 7559 case MATPRODUCT_AtB: 7560 if (product->api_user) { 7561 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7562 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7563 PetscOptionsEnd(); 7564 } else { 7565 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7566 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7567 PetscOptionsEnd(); 7568 } 7569 break; 7570 case MATPRODUCT_PtAP: 7571 if (product->api_user) { 7572 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7573 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7574 PetscOptionsEnd(); 7575 } else { 7576 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7577 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7578 PetscOptionsEnd(); 7579 } 7580 break; 7581 default: 7582 break; 7583 } 7584 match = (PetscBool)!usecpu; 7585 } 7586 #endif 7587 if (match) { 7588 switch (product->type) { 7589 case MATPRODUCT_AB: 7590 case MATPRODUCT_AtB: 7591 case MATPRODUCT_PtAP: 7592 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7593 break; 7594 default: 7595 break; 7596 } 7597 } 7598 /* fallback to MPIAIJ ops */ 7599 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7600 PetscFunctionReturn(0); 7601 } 7602 7603 /* 7604 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7605 7606 n - the number of block indices in cc[] 7607 cc - the block indices (must be large enough to contain the indices) 7608 */ 7609 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7610 { 7611 PetscInt cnt = -1, nidx, j; 7612 const PetscInt *idx; 7613 7614 PetscFunctionBegin; 7615 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7616 if (nidx) { 7617 cnt = 0; 7618 cc[cnt] = idx[0] / bs; 7619 for (j = 1; j < nidx; j++) { 7620 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7621 } 7622 } 7623 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7624 *n = cnt + 1; 7625 PetscFunctionReturn(0); 7626 } 7627 7628 /* 7629 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7630 7631 ncollapsed - the number of block indices 7632 collapsed - the block indices (must be large enough to contain the indices) 7633 */ 7634 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7635 { 7636 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7637 7638 PetscFunctionBegin; 7639 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7640 for (i = start + 1; i < start + bs; i++) { 7641 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7642 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7643 cprevtmp = cprev; 7644 cprev = merged; 7645 merged = cprevtmp; 7646 } 7647 *ncollapsed = nprev; 7648 if (collapsed) *collapsed = cprev; 7649 PetscFunctionReturn(0); 7650 } 7651 7652 /* 7653 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7654 */ 7655 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7656 { 7657 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7658 Mat tGmat; 7659 MPI_Comm comm; 7660 const PetscScalar *vals; 7661 const PetscInt *idx; 7662 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7663 MatScalar *AA; // this is checked in graph 7664 PetscBool isseqaij; 7665 Mat a, b, c; 7666 MatType jtype; 7667 7668 PetscFunctionBegin; 7669 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7670 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7671 PetscCall(MatGetType(Gmat, &jtype)); 7672 PetscCall(MatCreate(comm, &tGmat)); 7673 PetscCall(MatSetType(tGmat, jtype)); 7674 7675 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7676 Also, if the matrix is symmetric, can we skip this 7677 operation? It can be very expensive on large matrices. */ 7678 7679 // global sizes 7680 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7681 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7682 nloc = Iend - Istart; 7683 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7684 if (isseqaij) { 7685 a = Gmat; 7686 b = NULL; 7687 } else { 7688 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7689 a = d->A; 7690 b = d->B; 7691 garray = d->garray; 7692 } 7693 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7694 for (PetscInt row = 0; row < nloc; row++) { 7695 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7696 d_nnz[row] = ncols; 7697 if (ncols > maxcols) maxcols = ncols; 7698 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7699 } 7700 if (b) { 7701 for (PetscInt row = 0; row < nloc; row++) { 7702 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7703 o_nnz[row] = ncols; 7704 if (ncols > maxcols) maxcols = ncols; 7705 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7706 } 7707 } 7708 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7709 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7710 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7711 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7712 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7713 PetscCall(PetscFree2(d_nnz, o_nnz)); 7714 // 7715 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7716 nnz0 = nnz1 = 0; 7717 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7718 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7719 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7720 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7721 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7722 if (PetscRealPart(sv) > vfilter) { 7723 nnz1++; 7724 PetscInt cid = idx[jj] + Istart; //diag 7725 if (c != a) cid = garray[idx[jj]]; 7726 AA[ncol_row] = vals[jj]; 7727 AJ[ncol_row] = cid; 7728 ncol_row++; 7729 } 7730 } 7731 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7732 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7733 } 7734 } 7735 PetscCall(PetscFree2(AA, AJ)); 7736 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7737 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7738 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7739 7740 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7741 7742 *filteredG = tGmat; 7743 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7744 PetscFunctionReturn(0); 7745 } 7746 7747 /* 7748 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7749 7750 Input Parameter: 7751 . Amat - matrix 7752 - symmetrize - make the result symmetric 7753 + scale - scale with diagonal 7754 7755 Output Parameter: 7756 . a_Gmat - output scalar graph >= 0 7757 7758 */ 7759 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7760 { 7761 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7762 MPI_Comm comm; 7763 Mat Gmat; 7764 PetscBool ismpiaij, isseqaij; 7765 Mat a, b, c; 7766 MatType jtype; 7767 7768 PetscFunctionBegin; 7769 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7770 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7771 PetscCall(MatGetSize(Amat, &MM, &NN)); 7772 PetscCall(MatGetBlockSize(Amat, &bs)); 7773 nloc = (Iend - Istart) / bs; 7774 7775 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7776 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7777 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7778 7779 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7780 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7781 implementation */ 7782 if (bs > 1) { 7783 PetscCall(MatGetType(Amat, &jtype)); 7784 PetscCall(MatCreate(comm, &Gmat)); 7785 PetscCall(MatSetType(Gmat, jtype)); 7786 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7787 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7788 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7789 PetscInt *d_nnz, *o_nnz; 7790 MatScalar *aa, val, *AA; 7791 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7792 if (isseqaij) { 7793 a = Amat; 7794 b = NULL; 7795 } else { 7796 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7797 a = d->A; 7798 b = d->B; 7799 } 7800 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7801 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7802 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7803 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7804 const PetscInt *cols; 7805 for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows 7806 PetscCall(MatGetRow(c, brow, &jj, &cols, NULL)); 7807 nnz[brow / bs] = jj / bs; 7808 if (jj % bs) ok = 0; 7809 if (cols) j0 = cols[0]; 7810 else j0 = -1; 7811 PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL)); 7812 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7813 for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks 7814 PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL)); 7815 if (jj % bs) ok = 0; 7816 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7817 if (nnz[brow / bs] != jj / bs) ok = 0; 7818 PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL)); 7819 } 7820 if (!ok) { 7821 PetscCall(PetscFree2(d_nnz, o_nnz)); 7822 goto old_bs; 7823 } 7824 } 7825 } 7826 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7827 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7828 PetscCall(PetscFree2(d_nnz, o_nnz)); 7829 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7830 // diag 7831 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7832 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7833 ai = aseq->i; 7834 n = ai[brow + 1] - ai[brow]; 7835 aj = aseq->j + ai[brow]; 7836 for (int k = 0; k < n; k += bs) { // block columns 7837 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7838 val = 0; 7839 for (int ii = 0; ii < bs; ii++) { // rows in block 7840 aa = aseq->a + ai[brow + ii] + k; 7841 for (int jj = 0; jj < bs; jj++) { // columns in block 7842 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7843 } 7844 } 7845 AA[k / bs] = val; 7846 } 7847 grow = Istart / bs + brow / bs; 7848 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7849 } 7850 // off-diag 7851 if (ismpiaij) { 7852 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7853 const PetscScalar *vals; 7854 const PetscInt *cols, *garray = aij->garray; 7855 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7856 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7857 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7858 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7859 AA[k / bs] = 0; 7860 AJ[cidx] = garray[cols[k]] / bs; 7861 } 7862 nc = ncols / bs; 7863 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7864 for (int ii = 0; ii < bs; ii++) { // rows in block 7865 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7866 for (int k = 0; k < ncols; k += bs) { 7867 for (int jj = 0; jj < bs; jj++) { // cols in block 7868 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7869 } 7870 } 7871 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7872 } 7873 grow = Istart / bs + brow / bs; 7874 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7875 } 7876 } 7877 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7878 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7879 PetscCall(PetscFree2(AA, AJ)); 7880 } else { 7881 const PetscScalar *vals; 7882 const PetscInt *idx; 7883 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7884 old_bs: 7885 /* 7886 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7887 */ 7888 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7889 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7890 if (isseqaij) { 7891 PetscInt max_d_nnz; 7892 /* 7893 Determine exact preallocation count for (sequential) scalar matrix 7894 */ 7895 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7896 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7897 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7898 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7899 PetscCall(PetscFree3(w0, w1, w2)); 7900 } else if (ismpiaij) { 7901 Mat Daij, Oaij; 7902 const PetscInt *garray; 7903 PetscInt max_d_nnz; 7904 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7905 /* 7906 Determine exact preallocation count for diagonal block portion of scalar matrix 7907 */ 7908 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7909 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7910 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7911 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7912 PetscCall(PetscFree3(w0, w1, w2)); 7913 /* 7914 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7915 */ 7916 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7917 o_nnz[jj] = 0; 7918 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7919 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7920 o_nnz[jj] += ncols; 7921 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7922 } 7923 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7924 } 7925 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7926 /* get scalar copy (norms) of matrix */ 7927 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7928 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7929 PetscCall(PetscFree2(d_nnz, o_nnz)); 7930 for (Ii = Istart; Ii < Iend; Ii++) { 7931 PetscInt dest_row = Ii / bs; 7932 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7933 for (jj = 0; jj < ncols; jj++) { 7934 PetscInt dest_col = idx[jj] / bs; 7935 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7936 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7937 } 7938 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7939 } 7940 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7941 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7942 } 7943 } else { 7944 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7945 else { 7946 Gmat = Amat; 7947 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7948 } 7949 if (isseqaij) { 7950 a = Gmat; 7951 b = NULL; 7952 } else { 7953 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7954 a = d->A; 7955 b = d->B; 7956 } 7957 if (filter >= 0 || scale) { 7958 /* take absolute value of each entry */ 7959 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7960 MatInfo info; 7961 PetscScalar *avals; 7962 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7963 PetscCall(MatSeqAIJGetArray(c, &avals)); 7964 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7965 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7966 } 7967 } 7968 } 7969 if (symmetrize) { 7970 PetscBool isset, issym; 7971 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7972 if (!isset || !issym) { 7973 Mat matTrans; 7974 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7975 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7976 PetscCall(MatDestroy(&matTrans)); 7977 } 7978 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7979 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7980 if (scale) { 7981 /* scale c for all diagonal values = 1 or -1 */ 7982 Vec diag; 7983 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7984 PetscCall(MatGetDiagonal(Gmat, diag)); 7985 PetscCall(VecReciprocal(diag)); 7986 PetscCall(VecSqrtAbs(diag)); 7987 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7988 PetscCall(VecDestroy(&diag)); 7989 } 7990 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7991 7992 if (filter >= 0) { 7993 Mat Fmat = NULL; /* some silly compiler needs this */ 7994 7995 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 7996 PetscCall(MatDestroy(&Gmat)); 7997 Gmat = Fmat; 7998 } 7999 *a_Gmat = Gmat; 8000 PetscFunctionReturn(0); 8001 } 8002 8003 /* 8004 Special version for direct calls from Fortran 8005 */ 8006 #include <petsc/private/fortranimpl.h> 8007 8008 /* Change these macros so can be used in void function */ 8009 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8010 #undef PetscCall 8011 #define PetscCall(...) \ 8012 do { \ 8013 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8014 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8015 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8016 return; \ 8017 } \ 8018 } while (0) 8019 8020 #undef SETERRQ 8021 #define SETERRQ(comm, ierr, ...) \ 8022 do { \ 8023 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8024 return; \ 8025 } while (0) 8026 8027 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8028 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8029 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8030 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8031 #else 8032 #endif 8033 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8034 { 8035 Mat mat = *mmat; 8036 PetscInt m = *mm, n = *mn; 8037 InsertMode addv = *maddv; 8038 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8039 PetscScalar value; 8040 8041 MatCheckPreallocated(mat, 1); 8042 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8043 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8044 { 8045 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8046 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8047 PetscBool roworiented = aij->roworiented; 8048 8049 /* Some Variables required in the macro */ 8050 Mat A = aij->A; 8051 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8052 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8053 MatScalar *aa; 8054 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8055 Mat B = aij->B; 8056 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8057 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8058 MatScalar *ba; 8059 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8060 * cannot use "#if defined" inside a macro. */ 8061 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8062 8063 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8064 PetscInt nonew = a->nonew; 8065 MatScalar *ap1, *ap2; 8066 8067 PetscFunctionBegin; 8068 PetscCall(MatSeqAIJGetArray(A, &aa)); 8069 PetscCall(MatSeqAIJGetArray(B, &ba)); 8070 for (i = 0; i < m; i++) { 8071 if (im[i] < 0) continue; 8072 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8073 if (im[i] >= rstart && im[i] < rend) { 8074 row = im[i] - rstart; 8075 lastcol1 = -1; 8076 rp1 = aj + ai[row]; 8077 ap1 = aa + ai[row]; 8078 rmax1 = aimax[row]; 8079 nrow1 = ailen[row]; 8080 low1 = 0; 8081 high1 = nrow1; 8082 lastcol2 = -1; 8083 rp2 = bj + bi[row]; 8084 ap2 = ba + bi[row]; 8085 rmax2 = bimax[row]; 8086 nrow2 = bilen[row]; 8087 low2 = 0; 8088 high2 = nrow2; 8089 8090 for (j = 0; j < n; j++) { 8091 if (roworiented) value = v[i * n + j]; 8092 else value = v[i + j * m]; 8093 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8094 if (in[j] >= cstart && in[j] < cend) { 8095 col = in[j] - cstart; 8096 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8097 } else if (in[j] < 0) continue; 8098 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8099 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 8100 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8101 } else { 8102 if (mat->was_assembled) { 8103 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8104 #if defined(PETSC_USE_CTABLE) 8105 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8106 col--; 8107 #else 8108 col = aij->colmap[in[j]] - 1; 8109 #endif 8110 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8111 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8112 col = in[j]; 8113 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8114 B = aij->B; 8115 b = (Mat_SeqAIJ *)B->data; 8116 bimax = b->imax; 8117 bi = b->i; 8118 bilen = b->ilen; 8119 bj = b->j; 8120 rp2 = bj + bi[row]; 8121 ap2 = ba + bi[row]; 8122 rmax2 = bimax[row]; 8123 nrow2 = bilen[row]; 8124 low2 = 0; 8125 high2 = nrow2; 8126 bm = aij->B->rmap->n; 8127 ba = b->a; 8128 inserted = PETSC_FALSE; 8129 } 8130 } else col = in[j]; 8131 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8132 } 8133 } 8134 } else if (!aij->donotstash) { 8135 if (roworiented) { 8136 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8137 } else { 8138 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8139 } 8140 } 8141 } 8142 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8143 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8144 } 8145 PetscFunctionReturnVoid(); 8146 } 8147 8148 /* Undefining these here since they were redefined from their original definition above! No 8149 * other PETSc functions should be defined past this point, as it is impossible to recover the 8150 * original definitions */ 8151 #undef PetscCall 8152 #undef SETERRQ 8153