1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 10 Mat B; 11 12 PetscFunctionBegin; 13 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 14 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 15 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 16 PetscCall(MatDestroy(&B)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 21 Mat B; 22 23 PetscFunctionBegin; 24 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 25 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 26 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 27 PetscFunctionReturn(0); 28 } 29 30 /*MC 31 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 34 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 35 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 41 42 Developer Note: 43 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 44 enough exist. 45 46 Level: beginner 47 48 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 49 M*/ 50 51 /*MC 52 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 53 54 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 55 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 56 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 57 for communicators controlling multiple processes. It is recommended that you call both of 58 the above preallocation routines for simplicity. 59 60 Options Database Keys: 61 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 62 63 Level: beginner 64 65 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 66 M*/ 67 68 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) { 69 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 70 71 PetscFunctionBegin; 72 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 73 A->boundtocpu = flg; 74 #endif 75 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 76 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 77 78 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 79 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 80 * to differ from the parent matrix. */ 81 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 82 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 83 84 PetscFunctionReturn(0); 85 } 86 87 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) { 88 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 89 90 PetscFunctionBegin; 91 if (mat->A) { 92 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 93 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 94 } 95 PetscFunctionReturn(0); 96 } 97 98 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) { 99 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 100 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 101 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 102 const PetscInt *ia, *ib; 103 const MatScalar *aa, *bb, *aav, *bav; 104 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 105 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 106 107 PetscFunctionBegin; 108 *keptrows = NULL; 109 110 ia = a->i; 111 ib = b->i; 112 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 113 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 114 for (i = 0; i < m; i++) { 115 na = ia[i + 1] - ia[i]; 116 nb = ib[i + 1] - ib[i]; 117 if (!na && !nb) { 118 cnt++; 119 goto ok1; 120 } 121 aa = aav + ia[i]; 122 for (j = 0; j < na; j++) { 123 if (aa[j] != 0.0) goto ok1; 124 } 125 bb = bav + ib[i]; 126 for (j = 0; j < nb; j++) { 127 if (bb[j] != 0.0) goto ok1; 128 } 129 cnt++; 130 ok1:; 131 } 132 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 133 if (!n0rows) { 134 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 135 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 136 PetscFunctionReturn(0); 137 } 138 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 139 cnt = 0; 140 for (i = 0; i < m; i++) { 141 na = ia[i + 1] - ia[i]; 142 nb = ib[i + 1] - ib[i]; 143 if (!na && !nb) continue; 144 aa = aav + ia[i]; 145 for (j = 0; j < na; j++) { 146 if (aa[j] != 0.0) { 147 rows[cnt++] = rstart + i; 148 goto ok2; 149 } 150 } 151 bb = bav + ib[i]; 152 for (j = 0; j < nb; j++) { 153 if (bb[j] != 0.0) { 154 rows[cnt++] = rstart + i; 155 goto ok2; 156 } 157 } 158 ok2:; 159 } 160 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 161 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 162 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 163 PetscFunctionReturn(0); 164 } 165 166 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) { 167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 168 PetscBool cong; 169 170 PetscFunctionBegin; 171 PetscCall(MatHasCongruentLayouts(Y, &cong)); 172 if (Y->assembled && cong) { 173 PetscCall(MatDiagonalSet(aij->A, D, is)); 174 } else { 175 PetscCall(MatDiagonalSet_Default(Y, D, is)); 176 } 177 PetscFunctionReturn(0); 178 } 179 180 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) { 181 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 182 PetscInt i, rstart, nrows, *rows; 183 184 PetscFunctionBegin; 185 *zrows = NULL; 186 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 187 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 188 for (i = 0; i < nrows; i++) rows[i] += rstart; 189 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 190 PetscFunctionReturn(0); 191 } 192 193 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) { 194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 195 PetscInt i, m, n, *garray = aij->garray; 196 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 197 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 198 PetscReal *work; 199 const PetscScalar *dummy; 200 201 PetscFunctionBegin; 202 PetscCall(MatGetSize(A, &m, &n)); 203 PetscCall(PetscCalloc1(n, &work)); 204 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 205 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 206 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 207 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 208 if (type == NORM_2) { 209 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 210 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 211 } else if (type == NORM_1) { 212 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 213 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 214 } else if (type == NORM_INFINITY) { 215 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 216 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 217 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 218 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 219 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 220 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 221 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 222 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 223 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 224 if (type == NORM_INFINITY) { 225 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 226 } else { 227 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 228 } 229 PetscCall(PetscFree(work)); 230 if (type == NORM_2) { 231 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 232 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 233 for (i = 0; i < n; i++) reductions[i] /= m; 234 } 235 PetscFunctionReturn(0); 236 } 237 238 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) { 239 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 240 IS sis, gis; 241 const PetscInt *isis, *igis; 242 PetscInt n, *iis, nsis, ngis, rstart, i; 243 244 PetscFunctionBegin; 245 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 246 PetscCall(MatFindNonzeroRows(a->B, &gis)); 247 PetscCall(ISGetSize(gis, &ngis)); 248 PetscCall(ISGetSize(sis, &nsis)); 249 PetscCall(ISGetIndices(sis, &isis)); 250 PetscCall(ISGetIndices(gis, &igis)); 251 252 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 253 PetscCall(PetscArraycpy(iis, igis, ngis)); 254 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 255 n = ngis + nsis; 256 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 257 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 258 for (i = 0; i < n; i++) iis[i] += rstart; 259 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 260 261 PetscCall(ISRestoreIndices(sis, &isis)); 262 PetscCall(ISRestoreIndices(gis, &igis)); 263 PetscCall(ISDestroy(&sis)); 264 PetscCall(ISDestroy(&gis)); 265 PetscFunctionReturn(0); 266 } 267 268 /* 269 Local utility routine that creates a mapping from the global column 270 number to the local number in the off-diagonal part of the local 271 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 272 a slightly higher hash table cost; without it it is not scalable (each processor 273 has an order N integer array but is fast to access. 274 */ 275 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) { 276 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 277 PetscInt n = aij->B->cmap->n, i; 278 279 PetscFunctionBegin; 280 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 281 #if defined(PETSC_USE_CTABLE) 282 PetscCall(PetscTableCreate(n, mat->cmap->N + 1, &aij->colmap)); 283 for (i = 0; i < n; i++) PetscCall(PetscTableAdd(aij->colmap, aij->garray[i] + 1, i + 1, INSERT_VALUES)); 284 #else 285 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 286 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 287 #endif 288 PetscFunctionReturn(0); 289 } 290 291 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 292 { \ 293 if (col <= lastcol1) low1 = 0; \ 294 else high1 = nrow1; \ 295 lastcol1 = col; \ 296 while (high1 - low1 > 5) { \ 297 t = (low1 + high1) / 2; \ 298 if (rp1[t] > col) high1 = t; \ 299 else low1 = t; \ 300 } \ 301 for (_i = low1; _i < high1; _i++) { \ 302 if (rp1[_i] > col) break; \ 303 if (rp1[_i] == col) { \ 304 if (addv == ADD_VALUES) { \ 305 ap1[_i] += value; \ 306 /* Not sure LogFlops will slow dow the code or not */ \ 307 (void)PetscLogFlops(1.0); \ 308 } else ap1[_i] = value; \ 309 goto a_noinsert; \ 310 } \ 311 } \ 312 if (value == 0.0 && ignorezeroentries && row != col) { \ 313 low1 = 0; \ 314 high1 = nrow1; \ 315 goto a_noinsert; \ 316 } \ 317 if (nonew == 1) { \ 318 low1 = 0; \ 319 high1 = nrow1; \ 320 goto a_noinsert; \ 321 } \ 322 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 323 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 324 N = nrow1++ - 1; \ 325 a->nz++; \ 326 high1++; \ 327 /* shift up all the later entries in this row */ \ 328 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 329 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 330 rp1[_i] = col; \ 331 ap1[_i] = value; \ 332 A->nonzerostate++; \ 333 a_noinsert:; \ 334 ailen[row] = nrow1; \ 335 } 336 337 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 338 { \ 339 if (col <= lastcol2) low2 = 0; \ 340 else high2 = nrow2; \ 341 lastcol2 = col; \ 342 while (high2 - low2 > 5) { \ 343 t = (low2 + high2) / 2; \ 344 if (rp2[t] > col) high2 = t; \ 345 else low2 = t; \ 346 } \ 347 for (_i = low2; _i < high2; _i++) { \ 348 if (rp2[_i] > col) break; \ 349 if (rp2[_i] == col) { \ 350 if (addv == ADD_VALUES) { \ 351 ap2[_i] += value; \ 352 (void)PetscLogFlops(1.0); \ 353 } else ap2[_i] = value; \ 354 goto b_noinsert; \ 355 } \ 356 } \ 357 if (value == 0.0 && ignorezeroentries) { \ 358 low2 = 0; \ 359 high2 = nrow2; \ 360 goto b_noinsert; \ 361 } \ 362 if (nonew == 1) { \ 363 low2 = 0; \ 364 high2 = nrow2; \ 365 goto b_noinsert; \ 366 } \ 367 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 368 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 369 N = nrow2++ - 1; \ 370 b->nz++; \ 371 high2++; \ 372 /* shift up all the later entries in this row */ \ 373 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 374 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 375 rp2[_i] = col; \ 376 ap2[_i] = value; \ 377 B->nonzerostate++; \ 378 b_noinsert:; \ 379 bilen[row] = nrow2; \ 380 } 381 382 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) { 383 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 384 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 385 PetscInt l, *garray = mat->garray, diag; 386 PetscScalar *aa, *ba; 387 388 PetscFunctionBegin; 389 /* code only works for square matrices A */ 390 391 /* find size of row to the left of the diagonal part */ 392 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 393 row = row - diag; 394 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 395 if (garray[b->j[b->i[row] + l]] > diag) break; 396 } 397 if (l) { 398 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 399 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 400 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 401 } 402 403 /* diagonal part */ 404 if (a->i[row + 1] - a->i[row]) { 405 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 406 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 407 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 408 } 409 410 /* right of diagonal part */ 411 if (b->i[row + 1] - b->i[row] - l) { 412 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 413 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 414 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 415 } 416 PetscFunctionReturn(0); 417 } 418 419 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 420 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 421 PetscScalar value = 0.0; 422 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 423 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 424 PetscBool roworiented = aij->roworiented; 425 426 /* Some Variables required in the macro */ 427 Mat A = aij->A; 428 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 429 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 430 PetscBool ignorezeroentries = a->ignorezeroentries; 431 Mat B = aij->B; 432 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 433 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 434 MatScalar *aa, *ba; 435 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 436 PetscInt nonew; 437 MatScalar *ap1, *ap2; 438 439 PetscFunctionBegin; 440 PetscCall(MatSeqAIJGetArray(A, &aa)); 441 PetscCall(MatSeqAIJGetArray(B, &ba)); 442 for (i = 0; i < m; i++) { 443 if (im[i] < 0) continue; 444 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 445 if (im[i] >= rstart && im[i] < rend) { 446 row = im[i] - rstart; 447 lastcol1 = -1; 448 rp1 = aj + ai[row]; 449 ap1 = aa + ai[row]; 450 rmax1 = aimax[row]; 451 nrow1 = ailen[row]; 452 low1 = 0; 453 high1 = nrow1; 454 lastcol2 = -1; 455 rp2 = bj + bi[row]; 456 ap2 = ba + bi[row]; 457 rmax2 = bimax[row]; 458 nrow2 = bilen[row]; 459 low2 = 0; 460 high2 = nrow2; 461 462 for (j = 0; j < n; j++) { 463 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 464 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 465 if (in[j] >= cstart && in[j] < cend) { 466 col = in[j] - cstart; 467 nonew = a->nonew; 468 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 469 } else if (in[j] < 0) { 470 continue; 471 } else { 472 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 473 if (mat->was_assembled) { 474 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 475 #if defined(PETSC_USE_CTABLE) 476 PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); /* map global col ids to local ones */ 477 col--; 478 #else 479 col = aij->colmap[in[j]] - 1; 480 #endif 481 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 482 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 483 col = in[j]; 484 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 485 B = aij->B; 486 b = (Mat_SeqAIJ *)B->data; 487 bimax = b->imax; 488 bi = b->i; 489 bilen = b->ilen; 490 bj = b->j; 491 ba = b->a; 492 rp2 = bj + bi[row]; 493 ap2 = ba + bi[row]; 494 rmax2 = bimax[row]; 495 nrow2 = bilen[row]; 496 low2 = 0; 497 high2 = nrow2; 498 bm = aij->B->rmap->n; 499 ba = b->a; 500 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 501 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 502 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 503 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 504 } 505 } else col = in[j]; 506 nonew = b->nonew; 507 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 508 } 509 } 510 } else { 511 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 512 if (!aij->donotstash) { 513 mat->assembled = PETSC_FALSE; 514 if (roworiented) { 515 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 516 } else { 517 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 518 } 519 } 520 } 521 } 522 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 523 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 524 PetscFunctionReturn(0); 525 } 526 527 /* 528 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 529 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 530 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 531 */ 532 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) { 533 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 534 Mat A = aij->A; /* diagonal part of the matrix */ 535 Mat B = aij->B; /* offdiagonal part of the matrix */ 536 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 537 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 538 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 539 PetscInt *ailen = a->ilen, *aj = a->j; 540 PetscInt *bilen = b->ilen, *bj = b->j; 541 PetscInt am = aij->A->rmap->n, j; 542 PetscInt diag_so_far = 0, dnz; 543 PetscInt offd_so_far = 0, onz; 544 545 PetscFunctionBegin; 546 /* Iterate over all rows of the matrix */ 547 for (j = 0; j < am; j++) { 548 dnz = onz = 0; 549 /* Iterate over all non-zero columns of the current row */ 550 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 551 /* If column is in the diagonal */ 552 if (mat_j[col] >= cstart && mat_j[col] < cend) { 553 aj[diag_so_far++] = mat_j[col] - cstart; 554 dnz++; 555 } else { /* off-diagonal entries */ 556 bj[offd_so_far++] = mat_j[col]; 557 onz++; 558 } 559 } 560 ailen[j] = dnz; 561 bilen[j] = onz; 562 } 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 568 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 569 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 570 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 571 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 572 */ 573 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) { 574 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 575 Mat A = aij->A; /* diagonal part of the matrix */ 576 Mat B = aij->B; /* offdiagonal part of the matrix */ 577 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 578 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 579 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 580 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 581 PetscInt *ailen = a->ilen, *aj = a->j; 582 PetscInt *bilen = b->ilen, *bj = b->j; 583 PetscInt am = aij->A->rmap->n, j; 584 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 585 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 586 PetscScalar *aa = a->a, *ba = b->a; 587 588 PetscFunctionBegin; 589 /* Iterate over all rows of the matrix */ 590 for (j = 0; j < am; j++) { 591 dnz_row = onz_row = 0; 592 rowstart_offd = full_offd_i[j]; 593 rowstart_diag = full_diag_i[j]; 594 /* Iterate over all non-zero columns of the current row */ 595 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 596 /* If column is in the diagonal */ 597 if (mat_j[col] >= cstart && mat_j[col] < cend) { 598 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 599 aa[rowstart_diag + dnz_row] = mat_a[col]; 600 dnz_row++; 601 } else { /* off-diagonal entries */ 602 bj[rowstart_offd + onz_row] = mat_j[col]; 603 ba[rowstart_offd + onz_row] = mat_a[col]; 604 onz_row++; 605 } 606 } 607 ailen[j] = dnz_row; 608 bilen[j] = onz_row; 609 } 610 PetscFunctionReturn(0); 611 } 612 613 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 614 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 615 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 616 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 617 618 PetscFunctionBegin; 619 for (i = 0; i < m; i++) { 620 if (idxm[i] < 0) continue; /* negative row */ 621 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 622 if (idxm[i] >= rstart && idxm[i] < rend) { 623 row = idxm[i] - rstart; 624 for (j = 0; j < n; j++) { 625 if (idxn[j] < 0) continue; /* negative column */ 626 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 627 if (idxn[j] >= cstart && idxn[j] < cend) { 628 col = idxn[j] - cstart; 629 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 630 } else { 631 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 632 #if defined(PETSC_USE_CTABLE) 633 PetscCall(PetscTableFind(aij->colmap, idxn[j] + 1, &col)); 634 col--; 635 #else 636 col = aij->colmap[idxn[j]] - 1; 637 #endif 638 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 639 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 640 } 641 } 642 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 643 } 644 PetscFunctionReturn(0); 645 } 646 647 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) { 648 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 649 PetscInt nstash, reallocs; 650 651 PetscFunctionBegin; 652 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 653 654 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 655 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 656 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 657 PetscFunctionReturn(0); 658 } 659 660 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 662 PetscMPIInt n; 663 PetscInt i, j, rstart, ncols, flg; 664 PetscInt *row, *col; 665 PetscBool other_disassembled; 666 PetscScalar *val; 667 668 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 669 670 PetscFunctionBegin; 671 if (!aij->donotstash && !mat->nooffprocentries) { 672 while (1) { 673 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 674 if (!flg) break; 675 676 for (i = 0; i < n;) { 677 /* Now identify the consecutive vals belonging to the same row */ 678 for (j = i, rstart = row[j]; j < n; j++) { 679 if (row[j] != rstart) break; 680 } 681 if (j < n) ncols = j - i; 682 else ncols = n - i; 683 /* Now assemble all these values with a single function call */ 684 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 685 i = j; 686 } 687 } 688 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 689 } 690 #if defined(PETSC_HAVE_DEVICE) 691 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 692 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 693 if (mat->boundtocpu) { 694 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 695 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 696 } 697 #endif 698 PetscCall(MatAssemblyBegin(aij->A, mode)); 699 PetscCall(MatAssemblyEnd(aij->A, mode)); 700 701 /* determine if any processor has disassembled, if so we must 702 also disassemble ourself, in order that we may reassemble. */ 703 /* 704 if nonzero structure of submatrix B cannot change then we know that 705 no processor disassembled thus we can skip this stuff 706 */ 707 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 708 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 709 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 710 PetscCall(MatDisAssemble_MPIAIJ(mat)); 711 } 712 } 713 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 714 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 715 #if defined(PETSC_HAVE_DEVICE) 716 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 717 #endif 718 PetscCall(MatAssemblyBegin(aij->B, mode)); 719 PetscCall(MatAssemblyEnd(aij->B, mode)); 720 721 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 722 723 aij->rowvalues = NULL; 724 725 PetscCall(VecDestroy(&aij->diag)); 726 727 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 728 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 729 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 730 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 731 } 732 #if defined(PETSC_HAVE_DEVICE) 733 mat->offloadmask = PETSC_OFFLOAD_BOTH; 734 #endif 735 PetscFunctionReturn(0); 736 } 737 738 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) { 739 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 740 741 PetscFunctionBegin; 742 PetscCall(MatZeroEntries(l->A)); 743 PetscCall(MatZeroEntries(l->B)); 744 PetscFunctionReturn(0); 745 } 746 747 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 748 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 749 PetscObjectState sA, sB; 750 PetscInt *lrows; 751 PetscInt r, len; 752 PetscBool cong, lch, gch; 753 754 PetscFunctionBegin; 755 /* get locally owned rows */ 756 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 757 PetscCall(MatHasCongruentLayouts(A, &cong)); 758 /* fix right hand side if needed */ 759 if (x && b) { 760 const PetscScalar *xx; 761 PetscScalar *bb; 762 763 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 764 PetscCall(VecGetArrayRead(x, &xx)); 765 PetscCall(VecGetArray(b, &bb)); 766 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 767 PetscCall(VecRestoreArrayRead(x, &xx)); 768 PetscCall(VecRestoreArray(b, &bb)); 769 } 770 771 sA = mat->A->nonzerostate; 772 sB = mat->B->nonzerostate; 773 774 if (diag != 0.0 && cong) { 775 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 776 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 777 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 778 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 779 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 780 PetscInt nnwA, nnwB; 781 PetscBool nnzA, nnzB; 782 783 nnwA = aijA->nonew; 784 nnwB = aijB->nonew; 785 nnzA = aijA->keepnonzeropattern; 786 nnzB = aijB->keepnonzeropattern; 787 if (!nnzA) { 788 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 789 aijA->nonew = 0; 790 } 791 if (!nnzB) { 792 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 793 aijB->nonew = 0; 794 } 795 /* Must zero here before the next loop */ 796 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 797 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 798 for (r = 0; r < len; ++r) { 799 const PetscInt row = lrows[r] + A->rmap->rstart; 800 if (row >= A->cmap->N) continue; 801 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 802 } 803 aijA->nonew = nnwA; 804 aijB->nonew = nnwB; 805 } else { 806 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 807 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 808 } 809 PetscCall(PetscFree(lrows)); 810 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 811 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 812 813 /* reduce nonzerostate */ 814 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 815 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 816 if (gch) A->nonzerostate++; 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 821 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 822 PetscMPIInt n = A->rmap->n; 823 PetscInt i, j, r, m, len = 0; 824 PetscInt *lrows, *owners = A->rmap->range; 825 PetscMPIInt p = 0; 826 PetscSFNode *rrows; 827 PetscSF sf; 828 const PetscScalar *xx; 829 PetscScalar *bb, *mask, *aij_a; 830 Vec xmask, lmask; 831 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 832 const PetscInt *aj, *ii, *ridx; 833 PetscScalar *aa; 834 835 PetscFunctionBegin; 836 /* Create SF where leaves are input rows and roots are owned rows */ 837 PetscCall(PetscMalloc1(n, &lrows)); 838 for (r = 0; r < n; ++r) lrows[r] = -1; 839 PetscCall(PetscMalloc1(N, &rrows)); 840 for (r = 0; r < N; ++r) { 841 const PetscInt idx = rows[r]; 842 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 843 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 844 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 845 } 846 rrows[r].rank = p; 847 rrows[r].index = rows[r] - owners[p]; 848 } 849 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 850 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 851 /* Collect flags for rows to be zeroed */ 852 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 853 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 854 PetscCall(PetscSFDestroy(&sf)); 855 /* Compress and put in row numbers */ 856 for (r = 0; r < n; ++r) 857 if (lrows[r] >= 0) lrows[len++] = r; 858 /* zero diagonal part of matrix */ 859 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 860 /* handle off diagonal part of matrix */ 861 PetscCall(MatCreateVecs(A, &xmask, NULL)); 862 PetscCall(VecDuplicate(l->lvec, &lmask)); 863 PetscCall(VecGetArray(xmask, &bb)); 864 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 865 PetscCall(VecRestoreArray(xmask, &bb)); 866 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 867 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 868 PetscCall(VecDestroy(&xmask)); 869 if (x && b) { /* this code is buggy when the row and column layout don't match */ 870 PetscBool cong; 871 872 PetscCall(MatHasCongruentLayouts(A, &cong)); 873 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 874 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 875 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 876 PetscCall(VecGetArrayRead(l->lvec, &xx)); 877 PetscCall(VecGetArray(b, &bb)); 878 } 879 PetscCall(VecGetArray(lmask, &mask)); 880 /* remove zeroed rows of off diagonal matrix */ 881 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 882 ii = aij->i; 883 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 884 /* loop over all elements of off process part of matrix zeroing removed columns*/ 885 if (aij->compressedrow.use) { 886 m = aij->compressedrow.nrows; 887 ii = aij->compressedrow.i; 888 ridx = aij->compressedrow.rindex; 889 for (i = 0; i < m; i++) { 890 n = ii[i + 1] - ii[i]; 891 aj = aij->j + ii[i]; 892 aa = aij_a + ii[i]; 893 894 for (j = 0; j < n; j++) { 895 if (PetscAbsScalar(mask[*aj])) { 896 if (b) bb[*ridx] -= *aa * xx[*aj]; 897 *aa = 0.0; 898 } 899 aa++; 900 aj++; 901 } 902 ridx++; 903 } 904 } else { /* do not use compressed row format */ 905 m = l->B->rmap->n; 906 for (i = 0; i < m; i++) { 907 n = ii[i + 1] - ii[i]; 908 aj = aij->j + ii[i]; 909 aa = aij_a + ii[i]; 910 for (j = 0; j < n; j++) { 911 if (PetscAbsScalar(mask[*aj])) { 912 if (b) bb[i] -= *aa * xx[*aj]; 913 *aa = 0.0; 914 } 915 aa++; 916 aj++; 917 } 918 } 919 } 920 if (x && b) { 921 PetscCall(VecRestoreArray(b, &bb)); 922 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 923 } 924 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 925 PetscCall(VecRestoreArray(lmask, &mask)); 926 PetscCall(VecDestroy(&lmask)); 927 PetscCall(PetscFree(lrows)); 928 929 /* only change matrix nonzero state if pattern was allowed to be changed */ 930 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 931 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 932 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 933 } 934 PetscFunctionReturn(0); 935 } 936 937 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) { 938 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 939 PetscInt nt; 940 VecScatter Mvctx = a->Mvctx; 941 942 PetscFunctionBegin; 943 PetscCall(VecGetLocalSize(xx, &nt)); 944 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 945 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 946 PetscUseTypeMethod(a->A, mult, xx, yy); 947 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 948 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 949 PetscFunctionReturn(0); 950 } 951 952 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 954 955 PetscFunctionBegin; 956 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 957 PetscFunctionReturn(0); 958 } 959 960 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 961 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 962 VecScatter Mvctx = a->Mvctx; 963 964 PetscFunctionBegin; 965 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 966 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 967 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 968 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 974 975 PetscFunctionBegin; 976 /* do nondiagonal part */ 977 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 978 /* do local part */ 979 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 980 /* add partial results together */ 981 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 982 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 983 PetscFunctionReturn(0); 984 } 985 986 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) { 987 MPI_Comm comm; 988 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij; 989 Mat Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs; 990 IS Me, Notme; 991 PetscInt M, N, first, last, *notme, i; 992 PetscBool lf; 993 PetscMPIInt size; 994 995 PetscFunctionBegin; 996 /* Easy test: symmetric diagonal block */ 997 Bij = (Mat_MPIAIJ *)Bmat->data; 998 Bdia = Bij->A; 999 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1000 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1001 if (!*f) PetscFunctionReturn(0); 1002 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1003 PetscCallMPI(MPI_Comm_size(comm, &size)); 1004 if (size == 1) PetscFunctionReturn(0); 1005 1006 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1007 PetscCall(MatGetSize(Amat, &M, &N)); 1008 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1009 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1010 for (i = 0; i < first; i++) notme[i] = i; 1011 for (i = last; i < M; i++) notme[i - last + first] = i; 1012 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1013 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1014 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1015 Aoff = Aoffs[0]; 1016 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1017 Boff = Boffs[0]; 1018 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1019 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1020 PetscCall(MatDestroyMatrices(1, &Boffs)); 1021 PetscCall(ISDestroy(&Me)); 1022 PetscCall(ISDestroy(&Notme)); 1023 PetscCall(PetscFree(notme)); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) { 1028 PetscFunctionBegin; 1029 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1030 PetscFunctionReturn(0); 1031 } 1032 1033 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 1036 PetscFunctionBegin; 1037 /* do nondiagonal part */ 1038 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1039 /* do local part */ 1040 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1041 /* add partial results together */ 1042 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1043 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1044 PetscFunctionReturn(0); 1045 } 1046 1047 /* 1048 This only works correctly for square matrices where the subblock A->A is the 1049 diagonal block 1050 */ 1051 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) { 1052 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1053 1054 PetscFunctionBegin; 1055 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1056 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1057 PetscCall(MatGetDiagonal(a->A, v)); 1058 PetscFunctionReturn(0); 1059 } 1060 1061 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) { 1062 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1063 1064 PetscFunctionBegin; 1065 PetscCall(MatScale(a->A, aa)); 1066 PetscCall(MatScale(a->B, aa)); 1067 PetscFunctionReturn(0); 1068 } 1069 1070 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1071 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) { 1072 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1073 1074 PetscFunctionBegin; 1075 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1076 PetscCall(PetscFree(aij->Aperm1)); 1077 PetscCall(PetscFree(aij->Bperm1)); 1078 PetscCall(PetscFree(aij->Ajmap1)); 1079 PetscCall(PetscFree(aij->Bjmap1)); 1080 1081 PetscCall(PetscFree(aij->Aimap2)); 1082 PetscCall(PetscFree(aij->Bimap2)); 1083 PetscCall(PetscFree(aij->Aperm2)); 1084 PetscCall(PetscFree(aij->Bperm2)); 1085 PetscCall(PetscFree(aij->Ajmap2)); 1086 PetscCall(PetscFree(aij->Bjmap2)); 1087 1088 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1089 PetscCall(PetscFree(aij->Cperm1)); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) { 1094 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1095 1096 PetscFunctionBegin; 1097 #if defined(PETSC_USE_LOG) 1098 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1099 #endif 1100 PetscCall(MatStashDestroy_Private(&mat->stash)); 1101 PetscCall(VecDestroy(&aij->diag)); 1102 PetscCall(MatDestroy(&aij->A)); 1103 PetscCall(MatDestroy(&aij->B)); 1104 #if defined(PETSC_USE_CTABLE) 1105 PetscCall(PetscTableDestroy(&aij->colmap)); 1106 #else 1107 PetscCall(PetscFree(aij->colmap)); 1108 #endif 1109 PetscCall(PetscFree(aij->garray)); 1110 PetscCall(VecDestroy(&aij->lvec)); 1111 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1112 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 1113 PetscCall(PetscFree(aij->ld)); 1114 1115 /* Free COO */ 1116 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1117 1118 PetscCall(PetscFree(mat->data)); 1119 1120 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1121 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 1122 1123 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1124 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1125 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1126 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 1127 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 1128 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 1129 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 1130 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1131 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 1132 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 1133 #if defined(PETSC_HAVE_CUDA) 1134 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 1135 #endif 1136 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1137 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 1138 #endif 1139 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 1140 #if defined(PETSC_HAVE_ELEMENTAL) 1141 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 1142 #endif 1143 #if defined(PETSC_HAVE_SCALAPACK) 1144 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 1145 #endif 1146 #if defined(PETSC_HAVE_HYPRE) 1147 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 1148 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 1149 #endif 1150 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1151 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 1152 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 1153 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 1154 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 1156 #if defined(PETSC_HAVE_MKL_SPARSE) 1157 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 1158 #endif 1159 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 1160 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1161 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) { 1168 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1169 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1170 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1171 const PetscInt *garray = aij->garray; 1172 const PetscScalar *aa, *ba; 1173 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 1178 PetscFunctionBegin; 1179 PetscCall(PetscViewerSetUp(viewer)); 1180 1181 M = mat->rmap->N; 1182 N = mat->cmap->N; 1183 m = mat->rmap->n; 1184 rs = mat->rmap->rstart; 1185 cs = mat->cmap->rstart; 1186 nz = A->nz + B->nz; 1187 1188 /* write matrix header */ 1189 header[0] = MAT_FILE_CLASSID; 1190 header[1] = M; 1191 header[2] = N; 1192 header[3] = nz; 1193 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1195 1196 /* fill in and store row lengths */ 1197 PetscCall(PetscMalloc1(m, &rowlens)); 1198 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1199 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1200 PetscCall(PetscFree(rowlens)); 1201 1202 /* fill in and store column indices */ 1203 PetscCall(PetscMalloc1(nz, &colidxs)); 1204 for (cnt = 0, i = 0; i < m; i++) { 1205 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1206 if (garray[B->j[jb]] > cs) break; 1207 colidxs[cnt++] = garray[B->j[jb]]; 1208 } 1209 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1210 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1211 } 1212 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1213 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1214 PetscCall(PetscFree(colidxs)); 1215 1216 /* fill in and store nonzero values */ 1217 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1218 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1219 PetscCall(PetscMalloc1(nz, &matvals)); 1220 for (cnt = 0, i = 0; i < m; i++) { 1221 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1222 if (garray[B->j[jb]] > cs) break; 1223 matvals[cnt++] = ba[jb]; 1224 } 1225 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1226 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1227 } 1228 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1229 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1230 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1231 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1232 PetscCall(PetscFree(matvals)); 1233 1234 /* write block size option to the viewer's .info file */ 1235 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #include <petscdraw.h> 1240 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 1241 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1242 PetscMPIInt rank = aij->rank, size = aij->size; 1243 PetscBool isdraw, iascii, isbinary; 1244 PetscViewer sviewer; 1245 PetscViewerFormat format; 1246 1247 PetscFunctionBegin; 1248 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1249 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1250 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1251 if (iascii) { 1252 PetscCall(PetscViewerGetFormat(viewer, &format)); 1253 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1254 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1255 PetscCall(PetscMalloc1(size, &nz)); 1256 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1257 for (i = 0; i < (PetscInt)size; i++) { 1258 nmax = PetscMax(nmax, nz[i]); 1259 nmin = PetscMin(nmin, nz[i]); 1260 navg += nz[i]; 1261 } 1262 PetscCall(PetscFree(nz)); 1263 navg = navg / size; 1264 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1265 PetscFunctionReturn(0); 1266 } 1267 PetscCall(PetscViewerGetFormat(viewer, &format)); 1268 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1269 MatInfo info; 1270 PetscInt *inodes = NULL; 1271 1272 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1273 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1274 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1275 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1276 if (!inodes) { 1277 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1278 (double)info.memory)); 1279 } else { 1280 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1281 (double)info.memory)); 1282 } 1283 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1284 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1285 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1287 PetscCall(PetscViewerFlush(viewer)); 1288 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1289 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1290 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1291 PetscFunctionReturn(0); 1292 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1293 PetscInt inodecount, inodelimit, *inodes; 1294 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1295 if (inodes) { 1296 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1297 } else { 1298 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1299 } 1300 PetscFunctionReturn(0); 1301 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1302 PetscFunctionReturn(0); 1303 } 1304 } else if (isbinary) { 1305 if (size == 1) { 1306 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1307 PetscCall(MatView(aij->A, viewer)); 1308 } else { 1309 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1310 } 1311 PetscFunctionReturn(0); 1312 } else if (iascii && size == 1) { 1313 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1314 PetscCall(MatView(aij->A, viewer)); 1315 PetscFunctionReturn(0); 1316 } else if (isdraw) { 1317 PetscDraw draw; 1318 PetscBool isnull; 1319 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1320 PetscCall(PetscDrawIsNull(draw, &isnull)); 1321 if (isnull) PetscFunctionReturn(0); 1322 } 1323 1324 { /* assemble the entire matrix onto first processor */ 1325 Mat A = NULL, Av; 1326 IS isrow, iscol; 1327 1328 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1329 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1330 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1331 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1332 /* The commented code uses MatCreateSubMatrices instead */ 1333 /* 1334 Mat *AA, A = NULL, Av; 1335 IS isrow,iscol; 1336 1337 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1338 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1339 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1340 if (rank == 0) { 1341 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1342 A = AA[0]; 1343 Av = AA[0]; 1344 } 1345 PetscCall(MatDestroySubMatrices(1,&AA)); 1346 */ 1347 PetscCall(ISDestroy(&iscol)); 1348 PetscCall(ISDestroy(&isrow)); 1349 /* 1350 Everyone has to call to draw the matrix since the graphics waits are 1351 synchronized across all processors that share the PetscDraw object 1352 */ 1353 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1354 if (rank == 0) { 1355 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1356 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1357 } 1358 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1359 PetscCall(PetscViewerFlush(viewer)); 1360 PetscCall(MatDestroy(&A)); 1361 } 1362 PetscFunctionReturn(0); 1363 } 1364 1365 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) { 1366 PetscBool iascii, isdraw, issocket, isbinary; 1367 1368 PetscFunctionBegin; 1369 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1370 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1371 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1372 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1373 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 1378 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1379 Vec bb1 = NULL; 1380 PetscBool hasop; 1381 1382 PetscFunctionBegin; 1383 if (flag == SOR_APPLY_UPPER) { 1384 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1385 PetscFunctionReturn(0); 1386 } 1387 1388 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1389 1390 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1391 if (flag & SOR_ZERO_INITIAL_GUESS) { 1392 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1393 its--; 1394 } 1395 1396 while (its--) { 1397 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1398 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1399 1400 /* update rhs: bb1 = bb - B*x */ 1401 PetscCall(VecScale(mat->lvec, -1.0)); 1402 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1403 1404 /* local sweep */ 1405 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1406 } 1407 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1408 if (flag & SOR_ZERO_INITIAL_GUESS) { 1409 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1410 its--; 1411 } 1412 while (its--) { 1413 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1414 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1415 1416 /* update rhs: bb1 = bb - B*x */ 1417 PetscCall(VecScale(mat->lvec, -1.0)); 1418 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1419 1420 /* local sweep */ 1421 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1422 } 1423 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1424 if (flag & SOR_ZERO_INITIAL_GUESS) { 1425 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1426 its--; 1427 } 1428 while (its--) { 1429 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1430 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1431 1432 /* update rhs: bb1 = bb - B*x */ 1433 PetscCall(VecScale(mat->lvec, -1.0)); 1434 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1435 1436 /* local sweep */ 1437 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1438 } 1439 } else if (flag & SOR_EISENSTAT) { 1440 Vec xx1; 1441 1442 PetscCall(VecDuplicate(bb, &xx1)); 1443 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1444 1445 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1446 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1447 if (!mat->diag) { 1448 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1449 PetscCall(MatGetDiagonal(matin, mat->diag)); 1450 } 1451 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1452 if (hasop) { 1453 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1454 } else { 1455 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1456 } 1457 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1458 1459 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1460 1461 /* local sweep */ 1462 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1463 PetscCall(VecAXPY(xx, 1.0, xx1)); 1464 PetscCall(VecDestroy(&xx1)); 1465 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1466 1467 PetscCall(VecDestroy(&bb1)); 1468 1469 matin->factorerrortype = mat->A->factorerrortype; 1470 PetscFunctionReturn(0); 1471 } 1472 1473 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) { 1474 Mat aA, aB, Aperm; 1475 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1476 PetscScalar *aa, *ba; 1477 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1478 PetscSF rowsf, sf; 1479 IS parcolp = NULL; 1480 PetscBool done; 1481 1482 PetscFunctionBegin; 1483 PetscCall(MatGetLocalSize(A, &m, &n)); 1484 PetscCall(ISGetIndices(rowp, &rwant)); 1485 PetscCall(ISGetIndices(colp, &cwant)); 1486 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1487 1488 /* Invert row permutation to find out where my rows should go */ 1489 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1490 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1491 PetscCall(PetscSFSetFromOptions(rowsf)); 1492 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1493 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1494 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1495 1496 /* Invert column permutation to find out where my columns should go */ 1497 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1498 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1499 PetscCall(PetscSFSetFromOptions(sf)); 1500 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1501 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1502 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1503 PetscCall(PetscSFDestroy(&sf)); 1504 1505 PetscCall(ISRestoreIndices(rowp, &rwant)); 1506 PetscCall(ISRestoreIndices(colp, &cwant)); 1507 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1508 1509 /* Find out where my gcols should go */ 1510 PetscCall(MatGetSize(aB, NULL, &ng)); 1511 PetscCall(PetscMalloc1(ng, &gcdest)); 1512 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1513 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1514 PetscCall(PetscSFSetFromOptions(sf)); 1515 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1516 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1517 PetscCall(PetscSFDestroy(&sf)); 1518 1519 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1520 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1521 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1522 for (i = 0; i < m; i++) { 1523 PetscInt row = rdest[i]; 1524 PetscMPIInt rowner; 1525 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1526 for (j = ai[i]; j < ai[i + 1]; j++) { 1527 PetscInt col = cdest[aj[j]]; 1528 PetscMPIInt cowner; 1529 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1530 if (rowner == cowner) dnnz[i]++; 1531 else onnz[i]++; 1532 } 1533 for (j = bi[i]; j < bi[i + 1]; j++) { 1534 PetscInt col = gcdest[bj[j]]; 1535 PetscMPIInt cowner; 1536 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1537 if (rowner == cowner) dnnz[i]++; 1538 else onnz[i]++; 1539 } 1540 } 1541 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1542 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1543 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1544 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1545 PetscCall(PetscSFDestroy(&rowsf)); 1546 1547 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1548 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1549 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1550 for (i = 0; i < m; i++) { 1551 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1552 PetscInt j0, rowlen; 1553 rowlen = ai[i + 1] - ai[i]; 1554 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1555 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1556 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1557 } 1558 rowlen = bi[i + 1] - bi[i]; 1559 for (j0 = j = 0; j < rowlen; j0 = j) { 1560 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1561 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1562 } 1563 } 1564 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1565 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1566 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1567 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1568 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1569 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1570 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1571 PetscCall(PetscFree3(work, rdest, cdest)); 1572 PetscCall(PetscFree(gcdest)); 1573 if (parcolp) PetscCall(ISDestroy(&colp)); 1574 *B = Aperm; 1575 PetscFunctionReturn(0); 1576 } 1577 1578 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) { 1579 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1580 1581 PetscFunctionBegin; 1582 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1583 if (ghosts) *ghosts = aij->garray; 1584 PetscFunctionReturn(0); 1585 } 1586 1587 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) { 1588 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1589 Mat A = mat->A, B = mat->B; 1590 PetscLogDouble isend[5], irecv[5]; 1591 1592 PetscFunctionBegin; 1593 info->block_size = 1.0; 1594 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1595 1596 isend[0] = info->nz_used; 1597 isend[1] = info->nz_allocated; 1598 isend[2] = info->nz_unneeded; 1599 isend[3] = info->memory; 1600 isend[4] = info->mallocs; 1601 1602 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1603 1604 isend[0] += info->nz_used; 1605 isend[1] += info->nz_allocated; 1606 isend[2] += info->nz_unneeded; 1607 isend[3] += info->memory; 1608 isend[4] += info->mallocs; 1609 if (flag == MAT_LOCAL) { 1610 info->nz_used = isend[0]; 1611 info->nz_allocated = isend[1]; 1612 info->nz_unneeded = isend[2]; 1613 info->memory = isend[3]; 1614 info->mallocs = isend[4]; 1615 } else if (flag == MAT_GLOBAL_MAX) { 1616 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1617 1618 info->nz_used = irecv[0]; 1619 info->nz_allocated = irecv[1]; 1620 info->nz_unneeded = irecv[2]; 1621 info->memory = irecv[3]; 1622 info->mallocs = irecv[4]; 1623 } else if (flag == MAT_GLOBAL_SUM) { 1624 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1625 1626 info->nz_used = irecv[0]; 1627 info->nz_allocated = irecv[1]; 1628 info->nz_unneeded = irecv[2]; 1629 info->memory = irecv[3]; 1630 info->mallocs = irecv[4]; 1631 } 1632 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1633 info->fill_ratio_needed = 0; 1634 info->factor_mallocs = 0; 1635 PetscFunctionReturn(0); 1636 } 1637 1638 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) { 1639 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1640 1641 PetscFunctionBegin; 1642 switch (op) { 1643 case MAT_NEW_NONZERO_LOCATIONS: 1644 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1645 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1646 case MAT_KEEP_NONZERO_PATTERN: 1647 case MAT_NEW_NONZERO_LOCATION_ERR: 1648 case MAT_USE_INODES: 1649 case MAT_IGNORE_ZERO_ENTRIES: 1650 case MAT_FORM_EXPLICIT_TRANSPOSE: 1651 MatCheckPreallocated(A, 1); 1652 PetscCall(MatSetOption(a->A, op, flg)); 1653 PetscCall(MatSetOption(a->B, op, flg)); 1654 break; 1655 case MAT_ROW_ORIENTED: 1656 MatCheckPreallocated(A, 1); 1657 a->roworiented = flg; 1658 1659 PetscCall(MatSetOption(a->A, op, flg)); 1660 PetscCall(MatSetOption(a->B, op, flg)); 1661 break; 1662 case MAT_FORCE_DIAGONAL_ENTRIES: 1663 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 1664 case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 1665 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1666 case MAT_SPD: 1667 case MAT_SYMMETRIC: 1668 case MAT_STRUCTURALLY_SYMMETRIC: 1669 case MAT_HERMITIAN: 1670 case MAT_SYMMETRY_ETERNAL: 1671 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1672 case MAT_SPD_ETERNAL: 1673 /* if the diagonal matrix is square it inherits some of the properties above */ 1674 break; 1675 case MAT_SUBMAT_SINGLEIS: A->submat_singleis = flg; break; 1676 case MAT_STRUCTURE_ONLY: 1677 /* The option is handled directly by MatSetOption() */ 1678 break; 1679 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1680 } 1681 PetscFunctionReturn(0); 1682 } 1683 1684 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1685 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1686 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1687 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1688 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1689 PetscInt *cmap, *idx_p; 1690 1691 PetscFunctionBegin; 1692 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1693 mat->getrowactive = PETSC_TRUE; 1694 1695 if (!mat->rowvalues && (idx || v)) { 1696 /* 1697 allocate enough space to hold information from the longest row. 1698 */ 1699 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1700 PetscInt max = 1, tmp; 1701 for (i = 0; i < matin->rmap->n; i++) { 1702 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1703 if (max < tmp) max = tmp; 1704 } 1705 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1706 } 1707 1708 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1709 lrow = row - rstart; 1710 1711 pvA = &vworkA; 1712 pcA = &cworkA; 1713 pvB = &vworkB; 1714 pcB = &cworkB; 1715 if (!v) { 1716 pvA = NULL; 1717 pvB = NULL; 1718 } 1719 if (!idx) { 1720 pcA = NULL; 1721 if (!v) pcB = NULL; 1722 } 1723 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1724 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1725 nztot = nzA + nzB; 1726 1727 cmap = mat->garray; 1728 if (v || idx) { 1729 if (nztot) { 1730 /* Sort by increasing column numbers, assuming A and B already sorted */ 1731 PetscInt imark = -1; 1732 if (v) { 1733 *v = v_p = mat->rowvalues; 1734 for (i = 0; i < nzB; i++) { 1735 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1736 else break; 1737 } 1738 imark = i; 1739 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1740 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1741 } 1742 if (idx) { 1743 *idx = idx_p = mat->rowindices; 1744 if (imark > -1) { 1745 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1746 } else { 1747 for (i = 0; i < nzB; i++) { 1748 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1749 else break; 1750 } 1751 imark = i; 1752 } 1753 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1754 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1755 } 1756 } else { 1757 if (idx) *idx = NULL; 1758 if (v) *v = NULL; 1759 } 1760 } 1761 *nz = nztot; 1762 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1763 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1764 PetscFunctionReturn(0); 1765 } 1766 1767 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1768 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1769 1770 PetscFunctionBegin; 1771 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1772 aij->getrowactive = PETSC_FALSE; 1773 PetscFunctionReturn(0); 1774 } 1775 1776 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) { 1777 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1778 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1779 PetscInt i, j, cstart = mat->cmap->rstart; 1780 PetscReal sum = 0.0; 1781 const MatScalar *v, *amata, *bmata; 1782 1783 PetscFunctionBegin; 1784 if (aij->size == 1) { 1785 PetscCall(MatNorm(aij->A, type, norm)); 1786 } else { 1787 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1788 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1789 if (type == NORM_FROBENIUS) { 1790 v = amata; 1791 for (i = 0; i < amat->nz; i++) { 1792 sum += PetscRealPart(PetscConj(*v) * (*v)); 1793 v++; 1794 } 1795 v = bmata; 1796 for (i = 0; i < bmat->nz; i++) { 1797 sum += PetscRealPart(PetscConj(*v) * (*v)); 1798 v++; 1799 } 1800 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1801 *norm = PetscSqrtReal(*norm); 1802 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1803 } else if (type == NORM_1) { /* max column norm */ 1804 PetscReal *tmp, *tmp2; 1805 PetscInt *jj, *garray = aij->garray; 1806 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1807 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1808 *norm = 0.0; 1809 v = amata; 1810 jj = amat->j; 1811 for (j = 0; j < amat->nz; j++) { 1812 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1813 v++; 1814 } 1815 v = bmata; 1816 jj = bmat->j; 1817 for (j = 0; j < bmat->nz; j++) { 1818 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1819 v++; 1820 } 1821 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1822 for (j = 0; j < mat->cmap->N; j++) { 1823 if (tmp2[j] > *norm) *norm = tmp2[j]; 1824 } 1825 PetscCall(PetscFree(tmp)); 1826 PetscCall(PetscFree(tmp2)); 1827 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1828 } else if (type == NORM_INFINITY) { /* max row norm */ 1829 PetscReal ntemp = 0.0; 1830 for (j = 0; j < aij->A->rmap->n; j++) { 1831 v = amata + amat->i[j]; 1832 sum = 0.0; 1833 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1834 sum += PetscAbsScalar(*v); 1835 v++; 1836 } 1837 v = bmata + bmat->i[j]; 1838 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1839 sum += PetscAbsScalar(*v); 1840 v++; 1841 } 1842 if (sum > ntemp) ntemp = sum; 1843 } 1844 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1845 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1846 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1847 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1848 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1849 } 1850 PetscFunctionReturn(0); 1851 } 1852 1853 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) { 1854 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1855 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1856 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1857 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1858 Mat B, A_diag, *B_diag; 1859 const MatScalar *pbv, *bv; 1860 1861 PetscFunctionBegin; 1862 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1863 ma = A->rmap->n; 1864 na = A->cmap->n; 1865 mb = a->B->rmap->n; 1866 nb = a->B->cmap->n; 1867 ai = Aloc->i; 1868 aj = Aloc->j; 1869 bi = Bloc->i; 1870 bj = Bloc->j; 1871 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1872 PetscInt *d_nnz, *g_nnz, *o_nnz; 1873 PetscSFNode *oloc; 1874 PETSC_UNUSED PetscSF sf; 1875 1876 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1877 /* compute d_nnz for preallocation */ 1878 PetscCall(PetscArrayzero(d_nnz, na)); 1879 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1880 /* compute local off-diagonal contributions */ 1881 PetscCall(PetscArrayzero(g_nnz, nb)); 1882 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1883 /* map those to global */ 1884 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1885 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1886 PetscCall(PetscSFSetFromOptions(sf)); 1887 PetscCall(PetscArrayzero(o_nnz, na)); 1888 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1889 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1890 PetscCall(PetscSFDestroy(&sf)); 1891 1892 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1893 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1894 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1895 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1896 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1897 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1898 } else { 1899 B = *matout; 1900 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1901 } 1902 1903 b = (Mat_MPIAIJ *)B->data; 1904 A_diag = a->A; 1905 B_diag = &b->A; 1906 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1907 A_diag_ncol = A_diag->cmap->N; 1908 B_diag_ilen = sub_B_diag->ilen; 1909 B_diag_i = sub_B_diag->i; 1910 1911 /* Set ilen for diagonal of B */ 1912 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1913 1914 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1915 very quickly (=without using MatSetValues), because all writes are local. */ 1916 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1917 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1918 1919 /* copy over the B part */ 1920 PetscCall(PetscMalloc1(bi[mb], &cols)); 1921 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1922 pbv = bv; 1923 row = A->rmap->rstart; 1924 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1925 cols_tmp = cols; 1926 for (i = 0; i < mb; i++) { 1927 ncol = bi[i + 1] - bi[i]; 1928 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1929 row++; 1930 pbv += ncol; 1931 cols_tmp += ncol; 1932 } 1933 PetscCall(PetscFree(cols)); 1934 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1935 1936 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1937 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1938 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1939 *matout = B; 1940 } else { 1941 PetscCall(MatHeaderMerge(A, &B)); 1942 } 1943 PetscFunctionReturn(0); 1944 } 1945 1946 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) { 1947 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1948 Mat a = aij->A, b = aij->B; 1949 PetscInt s1, s2, s3; 1950 1951 PetscFunctionBegin; 1952 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1953 if (rr) { 1954 PetscCall(VecGetLocalSize(rr, &s1)); 1955 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1956 /* Overlap communication with computation. */ 1957 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1958 } 1959 if (ll) { 1960 PetscCall(VecGetLocalSize(ll, &s1)); 1961 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1962 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1963 } 1964 /* scale the diagonal block */ 1965 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1966 1967 if (rr) { 1968 /* Do a scatter end and then right scale the off-diagonal block */ 1969 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1970 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1971 } 1972 PetscFunctionReturn(0); 1973 } 1974 1975 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) { 1976 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1977 1978 PetscFunctionBegin; 1979 PetscCall(MatSetUnfactored(a->A)); 1980 PetscFunctionReturn(0); 1981 } 1982 1983 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) { 1984 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 1985 Mat a, b, c, d; 1986 PetscBool flg; 1987 1988 PetscFunctionBegin; 1989 a = matA->A; 1990 b = matA->B; 1991 c = matB->A; 1992 d = matB->B; 1993 1994 PetscCall(MatEqual(a, c, &flg)); 1995 if (flg) PetscCall(MatEqual(b, d, &flg)); 1996 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) { 2001 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2002 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2003 2004 PetscFunctionBegin; 2005 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2006 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2007 /* because of the column compression in the off-processor part of the matrix a->B, 2008 the number of columns in a->B and b->B may be different, hence we cannot call 2009 the MatCopy() directly on the two parts. If need be, we can provide a more 2010 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2011 then copying the submatrices */ 2012 PetscCall(MatCopy_Basic(A, B, str)); 2013 } else { 2014 PetscCall(MatCopy(a->A, b->A, str)); 2015 PetscCall(MatCopy(a->B, b->B, str)); 2016 } 2017 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatSetUp_MPIAIJ(Mat A) { 2022 PetscFunctionBegin; 2023 PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 /* 2028 Computes the number of nonzeros per row needed for preallocation when X and Y 2029 have different nonzero structure. 2030 */ 2031 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) { 2032 PetscInt i, j, k, nzx, nzy; 2033 2034 PetscFunctionBegin; 2035 /* Set the number of nonzeros in the new matrix */ 2036 for (i = 0; i < m; i++) { 2037 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2038 nzx = xi[i + 1] - xi[i]; 2039 nzy = yi[i + 1] - yi[i]; 2040 nnz[i] = 0; 2041 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2042 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2043 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2044 nnz[i]++; 2045 } 2046 for (; k < nzy; k++) nnz[i]++; 2047 } 2048 PetscFunctionReturn(0); 2049 } 2050 2051 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2052 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) { 2053 PetscInt m = Y->rmap->N; 2054 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2055 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2056 2057 PetscFunctionBegin; 2058 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2059 PetscFunctionReturn(0); 2060 } 2061 2062 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) { 2063 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2064 2065 PetscFunctionBegin; 2066 if (str == SAME_NONZERO_PATTERN) { 2067 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2068 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2069 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2070 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2071 } else { 2072 Mat B; 2073 PetscInt *nnz_d, *nnz_o; 2074 2075 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2076 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2077 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2078 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2079 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2080 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2081 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2082 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2083 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2084 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2085 PetscCall(MatHeaderMerge(Y, &B)); 2086 PetscCall(PetscFree(nnz_d)); 2087 PetscCall(PetscFree(nnz_o)); 2088 } 2089 PetscFunctionReturn(0); 2090 } 2091 2092 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2093 2094 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) { 2095 PetscFunctionBegin; 2096 if (PetscDefined(USE_COMPLEX)) { 2097 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2098 2099 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2100 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2101 } 2102 PetscFunctionReturn(0); 2103 } 2104 2105 PetscErrorCode MatRealPart_MPIAIJ(Mat A) { 2106 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2107 2108 PetscFunctionBegin; 2109 PetscCall(MatRealPart(a->A)); 2110 PetscCall(MatRealPart(a->B)); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) { 2115 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2116 2117 PetscFunctionBegin; 2118 PetscCall(MatImaginaryPart(a->A)); 2119 PetscCall(MatImaginaryPart(a->B)); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) { 2124 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2125 PetscInt i, *idxb = NULL, m = A->rmap->n; 2126 PetscScalar *va, *vv; 2127 Vec vB, vA; 2128 const PetscScalar *vb; 2129 2130 PetscFunctionBegin; 2131 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2132 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2133 2134 PetscCall(VecGetArrayWrite(vA, &va)); 2135 if (idx) { 2136 for (i = 0; i < m; i++) { 2137 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2138 } 2139 } 2140 2141 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2142 PetscCall(PetscMalloc1(m, &idxb)); 2143 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2144 2145 PetscCall(VecGetArrayWrite(v, &vv)); 2146 PetscCall(VecGetArrayRead(vB, &vb)); 2147 for (i = 0; i < m; i++) { 2148 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2149 vv[i] = vb[i]; 2150 if (idx) idx[i] = a->garray[idxb[i]]; 2151 } else { 2152 vv[i] = va[i]; 2153 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2154 } 2155 } 2156 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2157 PetscCall(VecRestoreArrayWrite(vA, &va)); 2158 PetscCall(VecRestoreArrayRead(vB, &vb)); 2159 PetscCall(PetscFree(idxb)); 2160 PetscCall(VecDestroy(&vA)); 2161 PetscCall(VecDestroy(&vB)); 2162 PetscFunctionReturn(0); 2163 } 2164 2165 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) { 2166 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2167 PetscInt m = A->rmap->n, n = A->cmap->n; 2168 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2169 PetscInt *cmap = mat->garray; 2170 PetscInt *diagIdx, *offdiagIdx; 2171 Vec diagV, offdiagV; 2172 PetscScalar *a, *diagA, *offdiagA; 2173 const PetscScalar *ba, *bav; 2174 PetscInt r, j, col, ncols, *bi, *bj; 2175 Mat B = mat->B; 2176 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2177 2178 PetscFunctionBegin; 2179 /* When a process holds entire A and other processes have no entry */ 2180 if (A->cmap->N == n) { 2181 PetscCall(VecGetArrayWrite(v, &diagA)); 2182 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2183 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2184 PetscCall(VecDestroy(&diagV)); 2185 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2186 PetscFunctionReturn(0); 2187 } else if (n == 0) { 2188 if (m) { 2189 PetscCall(VecGetArrayWrite(v, &a)); 2190 for (r = 0; r < m; r++) { 2191 a[r] = 0.0; 2192 if (idx) idx[r] = -1; 2193 } 2194 PetscCall(VecRestoreArrayWrite(v, &a)); 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2200 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2201 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2202 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2203 2204 /* Get offdiagIdx[] for implicit 0.0 */ 2205 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2206 ba = bav; 2207 bi = b->i; 2208 bj = b->j; 2209 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2210 for (r = 0; r < m; r++) { 2211 ncols = bi[r + 1] - bi[r]; 2212 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2213 offdiagA[r] = *ba; 2214 offdiagIdx[r] = cmap[0]; 2215 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2216 offdiagA[r] = 0.0; 2217 2218 /* Find first hole in the cmap */ 2219 for (j = 0; j < ncols; j++) { 2220 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2221 if (col > j && j < cstart) { 2222 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2223 break; 2224 } else if (col > j + n && j >= cstart) { 2225 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2226 break; 2227 } 2228 } 2229 if (j == ncols && ncols < A->cmap->N - n) { 2230 /* a hole is outside compressed Bcols */ 2231 if (ncols == 0) { 2232 if (cstart) { 2233 offdiagIdx[r] = 0; 2234 } else offdiagIdx[r] = cend; 2235 } else { /* ncols > 0 */ 2236 offdiagIdx[r] = cmap[ncols - 1] + 1; 2237 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2238 } 2239 } 2240 } 2241 2242 for (j = 0; j < ncols; j++) { 2243 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2244 offdiagA[r] = *ba; 2245 offdiagIdx[r] = cmap[*bj]; 2246 } 2247 ba++; 2248 bj++; 2249 } 2250 } 2251 2252 PetscCall(VecGetArrayWrite(v, &a)); 2253 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2254 for (r = 0; r < m; ++r) { 2255 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2256 a[r] = diagA[r]; 2257 if (idx) idx[r] = cstart + diagIdx[r]; 2258 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2259 a[r] = diagA[r]; 2260 if (idx) { 2261 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2262 idx[r] = cstart + diagIdx[r]; 2263 } else idx[r] = offdiagIdx[r]; 2264 } 2265 } else { 2266 a[r] = offdiagA[r]; 2267 if (idx) idx[r] = offdiagIdx[r]; 2268 } 2269 } 2270 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2271 PetscCall(VecRestoreArrayWrite(v, &a)); 2272 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2273 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2274 PetscCall(VecDestroy(&diagV)); 2275 PetscCall(VecDestroy(&offdiagV)); 2276 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2277 PetscFunctionReturn(0); 2278 } 2279 2280 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) { 2281 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2282 PetscInt m = A->rmap->n, n = A->cmap->n; 2283 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2284 PetscInt *cmap = mat->garray; 2285 PetscInt *diagIdx, *offdiagIdx; 2286 Vec diagV, offdiagV; 2287 PetscScalar *a, *diagA, *offdiagA; 2288 const PetscScalar *ba, *bav; 2289 PetscInt r, j, col, ncols, *bi, *bj; 2290 Mat B = mat->B; 2291 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2292 2293 PetscFunctionBegin; 2294 /* When a process holds entire A and other processes have no entry */ 2295 if (A->cmap->N == n) { 2296 PetscCall(VecGetArrayWrite(v, &diagA)); 2297 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2298 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2299 PetscCall(VecDestroy(&diagV)); 2300 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2301 PetscFunctionReturn(0); 2302 } else if (n == 0) { 2303 if (m) { 2304 PetscCall(VecGetArrayWrite(v, &a)); 2305 for (r = 0; r < m; r++) { 2306 a[r] = PETSC_MAX_REAL; 2307 if (idx) idx[r] = -1; 2308 } 2309 PetscCall(VecRestoreArrayWrite(v, &a)); 2310 } 2311 PetscFunctionReturn(0); 2312 } 2313 2314 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2315 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2316 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2317 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2318 2319 /* Get offdiagIdx[] for implicit 0.0 */ 2320 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2321 ba = bav; 2322 bi = b->i; 2323 bj = b->j; 2324 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2325 for (r = 0; r < m; r++) { 2326 ncols = bi[r + 1] - bi[r]; 2327 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2328 offdiagA[r] = *ba; 2329 offdiagIdx[r] = cmap[0]; 2330 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2331 offdiagA[r] = 0.0; 2332 2333 /* Find first hole in the cmap */ 2334 for (j = 0; j < ncols; j++) { 2335 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2336 if (col > j && j < cstart) { 2337 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2338 break; 2339 } else if (col > j + n && j >= cstart) { 2340 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2341 break; 2342 } 2343 } 2344 if (j == ncols && ncols < A->cmap->N - n) { 2345 /* a hole is outside compressed Bcols */ 2346 if (ncols == 0) { 2347 if (cstart) { 2348 offdiagIdx[r] = 0; 2349 } else offdiagIdx[r] = cend; 2350 } else { /* ncols > 0 */ 2351 offdiagIdx[r] = cmap[ncols - 1] + 1; 2352 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2353 } 2354 } 2355 } 2356 2357 for (j = 0; j < ncols; j++) { 2358 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2359 offdiagA[r] = *ba; 2360 offdiagIdx[r] = cmap[*bj]; 2361 } 2362 ba++; 2363 bj++; 2364 } 2365 } 2366 2367 PetscCall(VecGetArrayWrite(v, &a)); 2368 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2369 for (r = 0; r < m; ++r) { 2370 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2371 a[r] = diagA[r]; 2372 if (idx) idx[r] = cstart + diagIdx[r]; 2373 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2374 a[r] = diagA[r]; 2375 if (idx) { 2376 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2377 idx[r] = cstart + diagIdx[r]; 2378 } else idx[r] = offdiagIdx[r]; 2379 } 2380 } else { 2381 a[r] = offdiagA[r]; 2382 if (idx) idx[r] = offdiagIdx[r]; 2383 } 2384 } 2385 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2386 PetscCall(VecRestoreArrayWrite(v, &a)); 2387 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2388 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2389 PetscCall(VecDestroy(&diagV)); 2390 PetscCall(VecDestroy(&offdiagV)); 2391 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) { 2396 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2397 PetscInt m = A->rmap->n, n = A->cmap->n; 2398 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2399 PetscInt *cmap = mat->garray; 2400 PetscInt *diagIdx, *offdiagIdx; 2401 Vec diagV, offdiagV; 2402 PetscScalar *a, *diagA, *offdiagA; 2403 const PetscScalar *ba, *bav; 2404 PetscInt r, j, col, ncols, *bi, *bj; 2405 Mat B = mat->B; 2406 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2407 2408 PetscFunctionBegin; 2409 /* When a process holds entire A and other processes have no entry */ 2410 if (A->cmap->N == n) { 2411 PetscCall(VecGetArrayWrite(v, &diagA)); 2412 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2413 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2414 PetscCall(VecDestroy(&diagV)); 2415 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2416 PetscFunctionReturn(0); 2417 } else if (n == 0) { 2418 if (m) { 2419 PetscCall(VecGetArrayWrite(v, &a)); 2420 for (r = 0; r < m; r++) { 2421 a[r] = PETSC_MIN_REAL; 2422 if (idx) idx[r] = -1; 2423 } 2424 PetscCall(VecRestoreArrayWrite(v, &a)); 2425 } 2426 PetscFunctionReturn(0); 2427 } 2428 2429 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2430 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2431 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2432 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2433 2434 /* Get offdiagIdx[] for implicit 0.0 */ 2435 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2436 ba = bav; 2437 bi = b->i; 2438 bj = b->j; 2439 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2440 for (r = 0; r < m; r++) { 2441 ncols = bi[r + 1] - bi[r]; 2442 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2443 offdiagA[r] = *ba; 2444 offdiagIdx[r] = cmap[0]; 2445 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2446 offdiagA[r] = 0.0; 2447 2448 /* Find first hole in the cmap */ 2449 for (j = 0; j < ncols; j++) { 2450 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2451 if (col > j && j < cstart) { 2452 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2453 break; 2454 } else if (col > j + n && j >= cstart) { 2455 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2456 break; 2457 } 2458 } 2459 if (j == ncols && ncols < A->cmap->N - n) { 2460 /* a hole is outside compressed Bcols */ 2461 if (ncols == 0) { 2462 if (cstart) { 2463 offdiagIdx[r] = 0; 2464 } else offdiagIdx[r] = cend; 2465 } else { /* ncols > 0 */ 2466 offdiagIdx[r] = cmap[ncols - 1] + 1; 2467 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2468 } 2469 } 2470 } 2471 2472 for (j = 0; j < ncols; j++) { 2473 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2474 offdiagA[r] = *ba; 2475 offdiagIdx[r] = cmap[*bj]; 2476 } 2477 ba++; 2478 bj++; 2479 } 2480 } 2481 2482 PetscCall(VecGetArrayWrite(v, &a)); 2483 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2484 for (r = 0; r < m; ++r) { 2485 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2486 a[r] = diagA[r]; 2487 if (idx) idx[r] = cstart + diagIdx[r]; 2488 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2489 a[r] = diagA[r]; 2490 if (idx) { 2491 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2492 idx[r] = cstart + diagIdx[r]; 2493 } else idx[r] = offdiagIdx[r]; 2494 } 2495 } else { 2496 a[r] = offdiagA[r]; 2497 if (idx) idx[r] = offdiagIdx[r]; 2498 } 2499 } 2500 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2501 PetscCall(VecRestoreArrayWrite(v, &a)); 2502 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2503 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2504 PetscCall(VecDestroy(&diagV)); 2505 PetscCall(VecDestroy(&offdiagV)); 2506 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2507 PetscFunctionReturn(0); 2508 } 2509 2510 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) { 2511 Mat *dummy; 2512 2513 PetscFunctionBegin; 2514 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2515 *newmat = *dummy; 2516 PetscCall(PetscFree(dummy)); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) { 2521 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2522 2523 PetscFunctionBegin; 2524 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2525 A->factorerrortype = a->A->factorerrortype; 2526 PetscFunctionReturn(0); 2527 } 2528 2529 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) { 2530 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2531 2532 PetscFunctionBegin; 2533 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2534 PetscCall(MatSetRandom(aij->A, rctx)); 2535 if (x->assembled) { 2536 PetscCall(MatSetRandom(aij->B, rctx)); 2537 } else { 2538 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2539 } 2540 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2541 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) { 2546 PetscFunctionBegin; 2547 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2548 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2549 PetscFunctionReturn(0); 2550 } 2551 2552 /*@ 2553 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2554 2555 Not collective 2556 2557 Input Parameter: 2558 . A - the matrix 2559 2560 Output Parameter: 2561 . nz - the number of nonzeros 2562 2563 Level: advanced 2564 2565 .seealso: `MATMPIAIJ`, `Mat` 2566 @*/ 2567 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) { 2568 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2569 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2570 2571 PetscFunctionBegin; 2572 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2573 PetscFunctionReturn(0); 2574 } 2575 2576 /*@ 2577 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2578 2579 Collective on A 2580 2581 Input Parameters: 2582 + A - the matrix 2583 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2584 2585 Level: advanced 2586 2587 @*/ 2588 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) { 2589 PetscFunctionBegin; 2590 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2591 PetscFunctionReturn(0); 2592 } 2593 2594 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) { 2595 PetscBool sc = PETSC_FALSE, flg; 2596 2597 PetscFunctionBegin; 2598 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2599 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2600 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2601 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2602 PetscOptionsHeadEnd(); 2603 PetscFunctionReturn(0); 2604 } 2605 2606 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) { 2607 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2608 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2609 2610 PetscFunctionBegin; 2611 if (!Y->preallocated) { 2612 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2613 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2614 PetscInt nonew = aij->nonew; 2615 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2616 aij->nonew = nonew; 2617 } 2618 PetscCall(MatShift_Basic(Y, a)); 2619 PetscFunctionReturn(0); 2620 } 2621 2622 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) { 2623 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2624 2625 PetscFunctionBegin; 2626 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2627 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2628 if (d) { 2629 PetscInt rstart; 2630 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2631 *d += rstart; 2632 } 2633 PetscFunctionReturn(0); 2634 } 2635 2636 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) { 2637 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2638 2639 PetscFunctionBegin; 2640 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2641 PetscFunctionReturn(0); 2642 } 2643 2644 /* -------------------------------------------------------------------*/ 2645 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2646 MatGetRow_MPIAIJ, 2647 MatRestoreRow_MPIAIJ, 2648 MatMult_MPIAIJ, 2649 /* 4*/ MatMultAdd_MPIAIJ, 2650 MatMultTranspose_MPIAIJ, 2651 MatMultTransposeAdd_MPIAIJ, 2652 NULL, 2653 NULL, 2654 NULL, 2655 /*10*/ NULL, 2656 NULL, 2657 NULL, 2658 MatSOR_MPIAIJ, 2659 MatTranspose_MPIAIJ, 2660 /*15*/ MatGetInfo_MPIAIJ, 2661 MatEqual_MPIAIJ, 2662 MatGetDiagonal_MPIAIJ, 2663 MatDiagonalScale_MPIAIJ, 2664 MatNorm_MPIAIJ, 2665 /*20*/ MatAssemblyBegin_MPIAIJ, 2666 MatAssemblyEnd_MPIAIJ, 2667 MatSetOption_MPIAIJ, 2668 MatZeroEntries_MPIAIJ, 2669 /*24*/ MatZeroRows_MPIAIJ, 2670 NULL, 2671 NULL, 2672 NULL, 2673 NULL, 2674 /*29*/ MatSetUp_MPIAIJ, 2675 NULL, 2676 NULL, 2677 MatGetDiagonalBlock_MPIAIJ, 2678 NULL, 2679 /*34*/ MatDuplicate_MPIAIJ, 2680 NULL, 2681 NULL, 2682 NULL, 2683 NULL, 2684 /*39*/ MatAXPY_MPIAIJ, 2685 MatCreateSubMatrices_MPIAIJ, 2686 MatIncreaseOverlap_MPIAIJ, 2687 MatGetValues_MPIAIJ, 2688 MatCopy_MPIAIJ, 2689 /*44*/ MatGetRowMax_MPIAIJ, 2690 MatScale_MPIAIJ, 2691 MatShift_MPIAIJ, 2692 MatDiagonalSet_MPIAIJ, 2693 MatZeroRowsColumns_MPIAIJ, 2694 /*49*/ MatSetRandom_MPIAIJ, 2695 MatGetRowIJ_MPIAIJ, 2696 MatRestoreRowIJ_MPIAIJ, 2697 NULL, 2698 NULL, 2699 /*54*/ MatFDColoringCreate_MPIXAIJ, 2700 NULL, 2701 MatSetUnfactored_MPIAIJ, 2702 MatPermute_MPIAIJ, 2703 NULL, 2704 /*59*/ MatCreateSubMatrix_MPIAIJ, 2705 MatDestroy_MPIAIJ, 2706 MatView_MPIAIJ, 2707 NULL, 2708 NULL, 2709 /*64*/ NULL, 2710 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2711 NULL, 2712 NULL, 2713 NULL, 2714 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2715 MatGetRowMinAbs_MPIAIJ, 2716 NULL, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*75*/ MatFDColoringApply_AIJ, 2721 MatSetFromOptions_MPIAIJ, 2722 NULL, 2723 NULL, 2724 MatFindZeroDiagonals_MPIAIJ, 2725 /*80*/ NULL, 2726 NULL, 2727 NULL, 2728 /*83*/ MatLoad_MPIAIJ, 2729 MatIsSymmetric_MPIAIJ, 2730 NULL, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*89*/ NULL, 2735 NULL, 2736 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2737 NULL, 2738 NULL, 2739 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2740 NULL, 2741 NULL, 2742 NULL, 2743 MatBindToCPU_MPIAIJ, 2744 /*99*/ MatProductSetFromOptions_MPIAIJ, 2745 NULL, 2746 NULL, 2747 MatConjugate_MPIAIJ, 2748 NULL, 2749 /*104*/ MatSetValuesRow_MPIAIJ, 2750 MatRealPart_MPIAIJ, 2751 MatImaginaryPart_MPIAIJ, 2752 NULL, 2753 NULL, 2754 /*109*/ NULL, 2755 NULL, 2756 MatGetRowMin_MPIAIJ, 2757 NULL, 2758 MatMissingDiagonal_MPIAIJ, 2759 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2760 NULL, 2761 MatGetGhosts_MPIAIJ, 2762 NULL, 2763 NULL, 2764 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2765 NULL, 2766 NULL, 2767 NULL, 2768 MatGetMultiProcBlock_MPIAIJ, 2769 /*124*/ MatFindNonzeroRows_MPIAIJ, 2770 MatGetColumnReductions_MPIAIJ, 2771 MatInvertBlockDiagonal_MPIAIJ, 2772 MatInvertVariableBlockDiagonal_MPIAIJ, 2773 MatCreateSubMatricesMPI_MPIAIJ, 2774 /*129*/ NULL, 2775 NULL, 2776 NULL, 2777 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2778 NULL, 2779 /*134*/ NULL, 2780 NULL, 2781 NULL, 2782 NULL, 2783 NULL, 2784 /*139*/ MatSetBlockSizes_MPIAIJ, 2785 NULL, 2786 NULL, 2787 MatFDColoringSetUp_MPIXAIJ, 2788 MatFindOffBlockDiagonalEntries_MPIAIJ, 2789 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2790 /*145*/ NULL, 2791 NULL, 2792 NULL, 2793 MatCreateGraph_Simple_AIJ, 2794 MatFilter_AIJ, 2795 /*150*/ NULL}; 2796 2797 /* ----------------------------------------------------------------------------------------*/ 2798 2799 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) { 2800 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2801 2802 PetscFunctionBegin; 2803 PetscCall(MatStoreValues(aij->A)); 2804 PetscCall(MatStoreValues(aij->B)); 2805 PetscFunctionReturn(0); 2806 } 2807 2808 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) { 2809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2810 2811 PetscFunctionBegin; 2812 PetscCall(MatRetrieveValues(aij->A)); 2813 PetscCall(MatRetrieveValues(aij->B)); 2814 PetscFunctionReturn(0); 2815 } 2816 2817 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) { 2818 Mat_MPIAIJ *b; 2819 PetscMPIInt size; 2820 2821 PetscFunctionBegin; 2822 PetscCall(PetscLayoutSetUp(B->rmap)); 2823 PetscCall(PetscLayoutSetUp(B->cmap)); 2824 b = (Mat_MPIAIJ *)B->data; 2825 2826 #if defined(PETSC_USE_CTABLE) 2827 PetscCall(PetscTableDestroy(&b->colmap)); 2828 #else 2829 PetscCall(PetscFree(b->colmap)); 2830 #endif 2831 PetscCall(PetscFree(b->garray)); 2832 PetscCall(VecDestroy(&b->lvec)); 2833 PetscCall(VecScatterDestroy(&b->Mvctx)); 2834 2835 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2836 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2837 PetscCall(MatDestroy(&b->B)); 2838 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2839 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2840 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2841 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2842 2843 if (!B->preallocated) { 2844 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2845 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2846 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2847 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2848 } 2849 2850 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2851 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2852 B->preallocated = PETSC_TRUE; 2853 B->was_assembled = PETSC_FALSE; 2854 B->assembled = PETSC_FALSE; 2855 PetscFunctionReturn(0); 2856 } 2857 2858 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) { 2859 Mat_MPIAIJ *b; 2860 2861 PetscFunctionBegin; 2862 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2863 PetscCall(PetscLayoutSetUp(B->rmap)); 2864 PetscCall(PetscLayoutSetUp(B->cmap)); 2865 b = (Mat_MPIAIJ *)B->data; 2866 2867 #if defined(PETSC_USE_CTABLE) 2868 PetscCall(PetscTableDestroy(&b->colmap)); 2869 #else 2870 PetscCall(PetscFree(b->colmap)); 2871 #endif 2872 PetscCall(PetscFree(b->garray)); 2873 PetscCall(VecDestroy(&b->lvec)); 2874 PetscCall(VecScatterDestroy(&b->Mvctx)); 2875 2876 PetscCall(MatResetPreallocation(b->A)); 2877 PetscCall(MatResetPreallocation(b->B)); 2878 B->preallocated = PETSC_TRUE; 2879 B->was_assembled = PETSC_FALSE; 2880 B->assembled = PETSC_FALSE; 2881 PetscFunctionReturn(0); 2882 } 2883 2884 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) { 2885 Mat mat; 2886 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2887 2888 PetscFunctionBegin; 2889 *newmat = NULL; 2890 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2891 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2892 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2893 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2894 a = (Mat_MPIAIJ *)mat->data; 2895 2896 mat->factortype = matin->factortype; 2897 mat->assembled = matin->assembled; 2898 mat->insertmode = NOT_SET_VALUES; 2899 mat->preallocated = matin->preallocated; 2900 2901 a->size = oldmat->size; 2902 a->rank = oldmat->rank; 2903 a->donotstash = oldmat->donotstash; 2904 a->roworiented = oldmat->roworiented; 2905 a->rowindices = NULL; 2906 a->rowvalues = NULL; 2907 a->getrowactive = PETSC_FALSE; 2908 2909 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2910 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2911 2912 if (oldmat->colmap) { 2913 #if defined(PETSC_USE_CTABLE) 2914 PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap)); 2915 #else 2916 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2917 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2918 #endif 2919 } else a->colmap = NULL; 2920 if (oldmat->garray) { 2921 PetscInt len; 2922 len = oldmat->B->cmap->n; 2923 PetscCall(PetscMalloc1(len + 1, &a->garray)); 2924 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2925 } else a->garray = NULL; 2926 2927 /* It may happen MatDuplicate is called with a non-assembled matrix 2928 In fact, MatDuplicate only requires the matrix to be preallocated 2929 This may happen inside a DMCreateMatrix_Shell */ 2930 if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); } 2931 if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); } 2932 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2933 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2934 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2935 *newmat = mat; 2936 PetscFunctionReturn(0); 2937 } 2938 2939 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) { 2940 PetscBool isbinary, ishdf5; 2941 2942 PetscFunctionBegin; 2943 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2944 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2945 /* force binary viewer to load .info file if it has not yet done so */ 2946 PetscCall(PetscViewerSetUp(viewer)); 2947 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2948 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2949 if (isbinary) { 2950 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 2951 } else if (ishdf5) { 2952 #if defined(PETSC_HAVE_HDF5) 2953 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 2954 #else 2955 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2956 #endif 2957 } else { 2958 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2959 } 2960 PetscFunctionReturn(0); 2961 } 2962 2963 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) { 2964 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 2965 PetscInt *rowidxs, *colidxs; 2966 PetscScalar *matvals; 2967 2968 PetscFunctionBegin; 2969 PetscCall(PetscViewerSetUp(viewer)); 2970 2971 /* read in matrix header */ 2972 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 2973 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 2974 M = header[1]; 2975 N = header[2]; 2976 nz = header[3]; 2977 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 2978 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 2979 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 2980 2981 /* set block sizes from the viewer's .info file */ 2982 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 2983 /* set global sizes if not set already */ 2984 if (mat->rmap->N < 0) mat->rmap->N = M; 2985 if (mat->cmap->N < 0) mat->cmap->N = N; 2986 PetscCall(PetscLayoutSetUp(mat->rmap)); 2987 PetscCall(PetscLayoutSetUp(mat->cmap)); 2988 2989 /* check if the matrix sizes are correct */ 2990 PetscCall(MatGetSize(mat, &rows, &cols)); 2991 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 2992 2993 /* read in row lengths and build row indices */ 2994 PetscCall(MatGetLocalSize(mat, &m, NULL)); 2995 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 2996 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 2997 rowidxs[0] = 0; 2998 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 2999 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3000 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3001 /* read in column indices and matrix values */ 3002 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3003 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3004 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3005 /* store matrix indices and values */ 3006 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3007 PetscCall(PetscFree(rowidxs)); 3008 PetscCall(PetscFree2(colidxs, matvals)); 3009 PetscFunctionReturn(0); 3010 } 3011 3012 /* Not scalable because of ISAllGather() unless getting all columns. */ 3013 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) { 3014 IS iscol_local; 3015 PetscBool isstride; 3016 PetscMPIInt lisstride = 0, gisstride; 3017 3018 PetscFunctionBegin; 3019 /* check if we are grabbing all columns*/ 3020 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3021 3022 if (isstride) { 3023 PetscInt start, len, mstart, mlen; 3024 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3025 PetscCall(ISGetLocalSize(iscol, &len)); 3026 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3027 if (mstart == start && mlen - mstart == len) lisstride = 1; 3028 } 3029 3030 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3031 if (gisstride) { 3032 PetscInt N; 3033 PetscCall(MatGetSize(mat, NULL, &N)); 3034 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3035 PetscCall(ISSetIdentity(iscol_local)); 3036 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3037 } else { 3038 PetscInt cbs; 3039 PetscCall(ISGetBlockSize(iscol, &cbs)); 3040 PetscCall(ISAllGather(iscol, &iscol_local)); 3041 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3042 } 3043 3044 *isseq = iscol_local; 3045 PetscFunctionReturn(0); 3046 } 3047 3048 /* 3049 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3050 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3051 3052 Input Parameters: 3053 mat - matrix 3054 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3055 i.e., mat->rstart <= isrow[i] < mat->rend 3056 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3057 i.e., mat->cstart <= iscol[i] < mat->cend 3058 Output Parameter: 3059 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3060 iscol_o - sequential column index set for retrieving mat->B 3061 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3062 */ 3063 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) { 3064 Vec x, cmap; 3065 const PetscInt *is_idx; 3066 PetscScalar *xarray, *cmaparray; 3067 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3068 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3069 Mat B = a->B; 3070 Vec lvec = a->lvec, lcmap; 3071 PetscInt i, cstart, cend, Bn = B->cmap->N; 3072 MPI_Comm comm; 3073 VecScatter Mvctx = a->Mvctx; 3074 3075 PetscFunctionBegin; 3076 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3077 PetscCall(ISGetLocalSize(iscol, &ncols)); 3078 3079 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3080 PetscCall(MatCreateVecs(mat, &x, NULL)); 3081 PetscCall(VecSet(x, -1.0)); 3082 PetscCall(VecDuplicate(x, &cmap)); 3083 PetscCall(VecSet(cmap, -1.0)); 3084 3085 /* Get start indices */ 3086 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3087 isstart -= ncols; 3088 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3089 3090 PetscCall(ISGetIndices(iscol, &is_idx)); 3091 PetscCall(VecGetArray(x, &xarray)); 3092 PetscCall(VecGetArray(cmap, &cmaparray)); 3093 PetscCall(PetscMalloc1(ncols, &idx)); 3094 for (i = 0; i < ncols; i++) { 3095 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3096 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3097 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3098 } 3099 PetscCall(VecRestoreArray(x, &xarray)); 3100 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3101 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3102 3103 /* Get iscol_d */ 3104 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3105 PetscCall(ISGetBlockSize(iscol, &i)); 3106 PetscCall(ISSetBlockSize(*iscol_d, i)); 3107 3108 /* Get isrow_d */ 3109 PetscCall(ISGetLocalSize(isrow, &m)); 3110 rstart = mat->rmap->rstart; 3111 PetscCall(PetscMalloc1(m, &idx)); 3112 PetscCall(ISGetIndices(isrow, &is_idx)); 3113 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3114 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3115 3116 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3117 PetscCall(ISGetBlockSize(isrow, &i)); 3118 PetscCall(ISSetBlockSize(*isrow_d, i)); 3119 3120 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3121 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3122 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3123 3124 PetscCall(VecDuplicate(lvec, &lcmap)); 3125 3126 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3127 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3128 3129 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3130 /* off-process column indices */ 3131 count = 0; 3132 PetscCall(PetscMalloc1(Bn, &idx)); 3133 PetscCall(PetscMalloc1(Bn, &cmap1)); 3134 3135 PetscCall(VecGetArray(lvec, &xarray)); 3136 PetscCall(VecGetArray(lcmap, &cmaparray)); 3137 for (i = 0; i < Bn; i++) { 3138 if (PetscRealPart(xarray[i]) > -1.0) { 3139 idx[count] = i; /* local column index in off-diagonal part B */ 3140 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3141 count++; 3142 } 3143 } 3144 PetscCall(VecRestoreArray(lvec, &xarray)); 3145 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3146 3147 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3148 /* cannot ensure iscol_o has same blocksize as iscol! */ 3149 3150 PetscCall(PetscFree(idx)); 3151 *garray = cmap1; 3152 3153 PetscCall(VecDestroy(&x)); 3154 PetscCall(VecDestroy(&cmap)); 3155 PetscCall(VecDestroy(&lcmap)); 3156 PetscFunctionReturn(0); 3157 } 3158 3159 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3160 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) { 3161 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3162 Mat M = NULL; 3163 MPI_Comm comm; 3164 IS iscol_d, isrow_d, iscol_o; 3165 Mat Asub = NULL, Bsub = NULL; 3166 PetscInt n; 3167 3168 PetscFunctionBegin; 3169 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3170 3171 if (call == MAT_REUSE_MATRIX) { 3172 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3173 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3174 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3175 3176 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3177 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3178 3179 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3180 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3181 3182 /* Update diagonal and off-diagonal portions of submat */ 3183 asub = (Mat_MPIAIJ *)(*submat)->data; 3184 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3185 PetscCall(ISGetLocalSize(iscol_o, &n)); 3186 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3187 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3188 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3189 3190 } else { /* call == MAT_INITIAL_MATRIX) */ 3191 const PetscInt *garray; 3192 PetscInt BsubN; 3193 3194 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3195 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3196 3197 /* Create local submatrices Asub and Bsub */ 3198 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3199 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3200 3201 /* Create submatrix M */ 3202 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3203 3204 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3205 asub = (Mat_MPIAIJ *)M->data; 3206 3207 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3208 n = asub->B->cmap->N; 3209 if (BsubN > n) { 3210 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3211 const PetscInt *idx; 3212 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3213 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3214 3215 PetscCall(PetscMalloc1(n, &idx_new)); 3216 j = 0; 3217 PetscCall(ISGetIndices(iscol_o, &idx)); 3218 for (i = 0; i < n; i++) { 3219 if (j >= BsubN) break; 3220 while (subgarray[i] > garray[j]) j++; 3221 3222 if (subgarray[i] == garray[j]) { 3223 idx_new[i] = idx[j++]; 3224 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3225 } 3226 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3227 3228 PetscCall(ISDestroy(&iscol_o)); 3229 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3230 3231 } else if (BsubN < n) { 3232 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3233 } 3234 3235 PetscCall(PetscFree(garray)); 3236 *submat = M; 3237 3238 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3239 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3240 PetscCall(ISDestroy(&isrow_d)); 3241 3242 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3243 PetscCall(ISDestroy(&iscol_d)); 3244 3245 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3246 PetscCall(ISDestroy(&iscol_o)); 3247 } 3248 PetscFunctionReturn(0); 3249 } 3250 3251 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) { 3252 IS iscol_local = NULL, isrow_d; 3253 PetscInt csize; 3254 PetscInt n, i, j, start, end; 3255 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3256 MPI_Comm comm; 3257 3258 PetscFunctionBegin; 3259 /* If isrow has same processor distribution as mat, 3260 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3261 if (call == MAT_REUSE_MATRIX) { 3262 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3263 if (isrow_d) { 3264 sameRowDist = PETSC_TRUE; 3265 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3266 } else { 3267 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3268 if (iscol_local) { 3269 sameRowDist = PETSC_TRUE; 3270 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3271 } 3272 } 3273 } else { 3274 /* Check if isrow has same processor distribution as mat */ 3275 sameDist[0] = PETSC_FALSE; 3276 PetscCall(ISGetLocalSize(isrow, &n)); 3277 if (!n) { 3278 sameDist[0] = PETSC_TRUE; 3279 } else { 3280 PetscCall(ISGetMinMax(isrow, &i, &j)); 3281 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3282 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3283 } 3284 3285 /* Check if iscol has same processor distribution as mat */ 3286 sameDist[1] = PETSC_FALSE; 3287 PetscCall(ISGetLocalSize(iscol, &n)); 3288 if (!n) { 3289 sameDist[1] = PETSC_TRUE; 3290 } else { 3291 PetscCall(ISGetMinMax(iscol, &i, &j)); 3292 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3293 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3294 } 3295 3296 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3297 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3298 sameRowDist = tsameDist[0]; 3299 } 3300 3301 if (sameRowDist) { 3302 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3303 /* isrow and iscol have same processor distribution as mat */ 3304 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3305 PetscFunctionReturn(0); 3306 } else { /* sameRowDist */ 3307 /* isrow has same processor distribution as mat */ 3308 if (call == MAT_INITIAL_MATRIX) { 3309 PetscBool sorted; 3310 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3311 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3312 PetscCall(ISGetSize(iscol, &i)); 3313 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3314 3315 PetscCall(ISSorted(iscol_local, &sorted)); 3316 if (sorted) { 3317 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3318 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3319 PetscFunctionReturn(0); 3320 } 3321 } else { /* call == MAT_REUSE_MATRIX */ 3322 IS iscol_sub; 3323 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3324 if (iscol_sub) { 3325 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3326 PetscFunctionReturn(0); 3327 } 3328 } 3329 } 3330 } 3331 3332 /* General case: iscol -> iscol_local which has global size of iscol */ 3333 if (call == MAT_REUSE_MATRIX) { 3334 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3335 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3336 } else { 3337 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3338 } 3339 3340 PetscCall(ISGetLocalSize(iscol, &csize)); 3341 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3342 3343 if (call == MAT_INITIAL_MATRIX) { 3344 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3345 PetscCall(ISDestroy(&iscol_local)); 3346 } 3347 PetscFunctionReturn(0); 3348 } 3349 3350 /*@C 3351 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3352 and "off-diagonal" part of the matrix in CSR format. 3353 3354 Collective 3355 3356 Input Parameters: 3357 + comm - MPI communicator 3358 . A - "diagonal" portion of matrix 3359 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3360 - garray - global index of B columns 3361 3362 Output Parameter: 3363 . mat - the matrix, with input A as its local diagonal matrix 3364 Level: advanced 3365 3366 Notes: 3367 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3368 3369 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3370 3371 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3372 @*/ 3373 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) { 3374 Mat_MPIAIJ *maij; 3375 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3376 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3377 const PetscScalar *oa; 3378 Mat Bnew; 3379 PetscInt m, n, N; 3380 MatType mpi_mat_type; 3381 3382 PetscFunctionBegin; 3383 PetscCall(MatCreate(comm, mat)); 3384 PetscCall(MatGetSize(A, &m, &n)); 3385 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3386 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3387 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3388 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3389 3390 /* Get global columns of mat */ 3391 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3392 3393 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3394 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3395 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3396 PetscCall(MatSetType(*mat, mpi_mat_type)); 3397 3398 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3399 maij = (Mat_MPIAIJ *)(*mat)->data; 3400 3401 (*mat)->preallocated = PETSC_TRUE; 3402 3403 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3404 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3405 3406 /* Set A as diagonal portion of *mat */ 3407 maij->A = A; 3408 3409 nz = oi[m]; 3410 for (i = 0; i < nz; i++) { 3411 col = oj[i]; 3412 oj[i] = garray[col]; 3413 } 3414 3415 /* Set Bnew as off-diagonal portion of *mat */ 3416 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3417 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3418 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3419 bnew = (Mat_SeqAIJ *)Bnew->data; 3420 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3421 maij->B = Bnew; 3422 3423 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3424 3425 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3426 b->free_a = PETSC_FALSE; 3427 b->free_ij = PETSC_FALSE; 3428 PetscCall(MatDestroy(&B)); 3429 3430 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3431 bnew->free_a = PETSC_TRUE; 3432 bnew->free_ij = PETSC_TRUE; 3433 3434 /* condense columns of maij->B */ 3435 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3436 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3437 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3438 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3439 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3440 PetscFunctionReturn(0); 3441 } 3442 3443 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3444 3445 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) { 3446 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3447 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3448 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3449 Mat M, Msub, B = a->B; 3450 MatScalar *aa; 3451 Mat_SeqAIJ *aij; 3452 PetscInt *garray = a->garray, *colsub, Ncols; 3453 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3454 IS iscol_sub, iscmap; 3455 const PetscInt *is_idx, *cmap; 3456 PetscBool allcolumns = PETSC_FALSE; 3457 MPI_Comm comm; 3458 3459 PetscFunctionBegin; 3460 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3461 if (call == MAT_REUSE_MATRIX) { 3462 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3463 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3464 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3465 3466 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3467 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3468 3469 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3470 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3471 3472 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3473 3474 } else { /* call == MAT_INITIAL_MATRIX) */ 3475 PetscBool flg; 3476 3477 PetscCall(ISGetLocalSize(iscol, &n)); 3478 PetscCall(ISGetSize(iscol, &Ncols)); 3479 3480 /* (1) iscol -> nonscalable iscol_local */ 3481 /* Check for special case: each processor gets entire matrix columns */ 3482 PetscCall(ISIdentity(iscol_local, &flg)); 3483 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3484 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3485 if (allcolumns) { 3486 iscol_sub = iscol_local; 3487 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3488 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3489 3490 } else { 3491 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3492 PetscInt *idx, *cmap1, k; 3493 PetscCall(PetscMalloc1(Ncols, &idx)); 3494 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3495 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3496 count = 0; 3497 k = 0; 3498 for (i = 0; i < Ncols; i++) { 3499 j = is_idx[i]; 3500 if (j >= cstart && j < cend) { 3501 /* diagonal part of mat */ 3502 idx[count] = j; 3503 cmap1[count++] = i; /* column index in submat */ 3504 } else if (Bn) { 3505 /* off-diagonal part of mat */ 3506 if (j == garray[k]) { 3507 idx[count] = j; 3508 cmap1[count++] = i; /* column index in submat */ 3509 } else if (j > garray[k]) { 3510 while (j > garray[k] && k < Bn - 1) k++; 3511 if (j == garray[k]) { 3512 idx[count] = j; 3513 cmap1[count++] = i; /* column index in submat */ 3514 } 3515 } 3516 } 3517 } 3518 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3519 3520 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3521 PetscCall(ISGetBlockSize(iscol, &cbs)); 3522 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3523 3524 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3525 } 3526 3527 /* (3) Create sequential Msub */ 3528 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3529 } 3530 3531 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3532 aij = (Mat_SeqAIJ *)(Msub)->data; 3533 ii = aij->i; 3534 PetscCall(ISGetIndices(iscmap, &cmap)); 3535 3536 /* 3537 m - number of local rows 3538 Ncols - number of columns (same on all processors) 3539 rstart - first row in new global matrix generated 3540 */ 3541 PetscCall(MatGetSize(Msub, &m, NULL)); 3542 3543 if (call == MAT_INITIAL_MATRIX) { 3544 /* (4) Create parallel newmat */ 3545 PetscMPIInt rank, size; 3546 PetscInt csize; 3547 3548 PetscCallMPI(MPI_Comm_size(comm, &size)); 3549 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3550 3551 /* 3552 Determine the number of non-zeros in the diagonal and off-diagonal 3553 portions of the matrix in order to do correct preallocation 3554 */ 3555 3556 /* first get start and end of "diagonal" columns */ 3557 PetscCall(ISGetLocalSize(iscol, &csize)); 3558 if (csize == PETSC_DECIDE) { 3559 PetscCall(ISGetSize(isrow, &mglobal)); 3560 if (mglobal == Ncols) { /* square matrix */ 3561 nlocal = m; 3562 } else { 3563 nlocal = Ncols / size + ((Ncols % size) > rank); 3564 } 3565 } else { 3566 nlocal = csize; 3567 } 3568 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3569 rstart = rend - nlocal; 3570 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3571 3572 /* next, compute all the lengths */ 3573 jj = aij->j; 3574 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3575 olens = dlens + m; 3576 for (i = 0; i < m; i++) { 3577 jend = ii[i + 1] - ii[i]; 3578 olen = 0; 3579 dlen = 0; 3580 for (j = 0; j < jend; j++) { 3581 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3582 else dlen++; 3583 jj++; 3584 } 3585 olens[i] = olen; 3586 dlens[i] = dlen; 3587 } 3588 3589 PetscCall(ISGetBlockSize(isrow, &bs)); 3590 PetscCall(ISGetBlockSize(iscol, &cbs)); 3591 3592 PetscCall(MatCreate(comm, &M)); 3593 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3594 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3595 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3596 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3597 PetscCall(PetscFree(dlens)); 3598 3599 } else { /* call == MAT_REUSE_MATRIX */ 3600 M = *newmat; 3601 PetscCall(MatGetLocalSize(M, &i, NULL)); 3602 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3603 PetscCall(MatZeroEntries(M)); 3604 /* 3605 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3606 rather than the slower MatSetValues(). 3607 */ 3608 M->was_assembled = PETSC_TRUE; 3609 M->assembled = PETSC_FALSE; 3610 } 3611 3612 /* (5) Set values of Msub to *newmat */ 3613 PetscCall(PetscMalloc1(count, &colsub)); 3614 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3615 3616 jj = aij->j; 3617 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3618 for (i = 0; i < m; i++) { 3619 row = rstart + i; 3620 nz = ii[i + 1] - ii[i]; 3621 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3622 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3623 jj += nz; 3624 aa += nz; 3625 } 3626 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3627 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3628 3629 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3630 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3631 3632 PetscCall(PetscFree(colsub)); 3633 3634 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3635 if (call == MAT_INITIAL_MATRIX) { 3636 *newmat = M; 3637 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3638 PetscCall(MatDestroy(&Msub)); 3639 3640 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3641 PetscCall(ISDestroy(&iscol_sub)); 3642 3643 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3644 PetscCall(ISDestroy(&iscmap)); 3645 3646 if (iscol_local) { 3647 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3648 PetscCall(ISDestroy(&iscol_local)); 3649 } 3650 } 3651 PetscFunctionReturn(0); 3652 } 3653 3654 /* 3655 Not great since it makes two copies of the submatrix, first an SeqAIJ 3656 in local and then by concatenating the local matrices the end result. 3657 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3658 3659 This requires a sequential iscol with all indices. 3660 */ 3661 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) { 3662 PetscMPIInt rank, size; 3663 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3664 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3665 Mat M, Mreuse; 3666 MatScalar *aa, *vwork; 3667 MPI_Comm comm; 3668 Mat_SeqAIJ *aij; 3669 PetscBool colflag, allcolumns = PETSC_FALSE; 3670 3671 PetscFunctionBegin; 3672 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3673 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3674 PetscCallMPI(MPI_Comm_size(comm, &size)); 3675 3676 /* Check for special case: each processor gets entire matrix columns */ 3677 PetscCall(ISIdentity(iscol, &colflag)); 3678 PetscCall(ISGetLocalSize(iscol, &n)); 3679 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3680 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3681 3682 if (call == MAT_REUSE_MATRIX) { 3683 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3684 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3685 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3686 } else { 3687 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3688 } 3689 3690 /* 3691 m - number of local rows 3692 n - number of columns (same on all processors) 3693 rstart - first row in new global matrix generated 3694 */ 3695 PetscCall(MatGetSize(Mreuse, &m, &n)); 3696 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3697 if (call == MAT_INITIAL_MATRIX) { 3698 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3699 ii = aij->i; 3700 jj = aij->j; 3701 3702 /* 3703 Determine the number of non-zeros in the diagonal and off-diagonal 3704 portions of the matrix in order to do correct preallocation 3705 */ 3706 3707 /* first get start and end of "diagonal" columns */ 3708 if (csize == PETSC_DECIDE) { 3709 PetscCall(ISGetSize(isrow, &mglobal)); 3710 if (mglobal == n) { /* square matrix */ 3711 nlocal = m; 3712 } else { 3713 nlocal = n / size + ((n % size) > rank); 3714 } 3715 } else { 3716 nlocal = csize; 3717 } 3718 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3719 rstart = rend - nlocal; 3720 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3721 3722 /* next, compute all the lengths */ 3723 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3724 olens = dlens + m; 3725 for (i = 0; i < m; i++) { 3726 jend = ii[i + 1] - ii[i]; 3727 olen = 0; 3728 dlen = 0; 3729 for (j = 0; j < jend; j++) { 3730 if (*jj < rstart || *jj >= rend) olen++; 3731 else dlen++; 3732 jj++; 3733 } 3734 olens[i] = olen; 3735 dlens[i] = dlen; 3736 } 3737 PetscCall(MatCreate(comm, &M)); 3738 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3739 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3740 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3741 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3742 PetscCall(PetscFree(dlens)); 3743 } else { 3744 PetscInt ml, nl; 3745 3746 M = *newmat; 3747 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3748 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3749 PetscCall(MatZeroEntries(M)); 3750 /* 3751 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3752 rather than the slower MatSetValues(). 3753 */ 3754 M->was_assembled = PETSC_TRUE; 3755 M->assembled = PETSC_FALSE; 3756 } 3757 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3758 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3759 ii = aij->i; 3760 jj = aij->j; 3761 3762 /* trigger copy to CPU if needed */ 3763 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3764 for (i = 0; i < m; i++) { 3765 row = rstart + i; 3766 nz = ii[i + 1] - ii[i]; 3767 cwork = jj; 3768 jj += nz; 3769 vwork = aa; 3770 aa += nz; 3771 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3772 } 3773 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3774 3775 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3776 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3777 *newmat = M; 3778 3779 /* save submatrix used in processor for next request */ 3780 if (call == MAT_INITIAL_MATRIX) { 3781 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3782 PetscCall(MatDestroy(&Mreuse)); 3783 } 3784 PetscFunctionReturn(0); 3785 } 3786 3787 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) { 3788 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3789 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3790 const PetscInt *JJ; 3791 PetscBool nooffprocentries; 3792 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3793 3794 PetscFunctionBegin; 3795 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3796 3797 PetscCall(PetscLayoutSetUp(B->rmap)); 3798 PetscCall(PetscLayoutSetUp(B->cmap)); 3799 m = B->rmap->n; 3800 cstart = B->cmap->rstart; 3801 cend = B->cmap->rend; 3802 rstart = B->rmap->rstart; 3803 3804 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3805 3806 if (PetscDefined(USE_DEBUG)) { 3807 for (i = 0; i < m; i++) { 3808 nnz = Ii[i + 1] - Ii[i]; 3809 JJ = J + Ii[i]; 3810 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3811 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3812 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3813 } 3814 } 3815 3816 for (i = 0; i < m; i++) { 3817 nnz = Ii[i + 1] - Ii[i]; 3818 JJ = J + Ii[i]; 3819 nnz_max = PetscMax(nnz_max, nnz); 3820 d = 0; 3821 for (j = 0; j < nnz; j++) { 3822 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3823 } 3824 d_nnz[i] = d; 3825 o_nnz[i] = nnz - d; 3826 } 3827 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3828 PetscCall(PetscFree2(d_nnz, o_nnz)); 3829 3830 for (i = 0; i < m; i++) { 3831 ii = i + rstart; 3832 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3833 } 3834 nooffprocentries = B->nooffprocentries; 3835 B->nooffprocentries = PETSC_TRUE; 3836 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3837 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3838 B->nooffprocentries = nooffprocentries; 3839 3840 /* count number of entries below block diagonal */ 3841 PetscCall(PetscFree(Aij->ld)); 3842 PetscCall(PetscCalloc1(m, &ld)); 3843 Aij->ld = ld; 3844 for (i = 0; i < m; i++) { 3845 nnz = Ii[i + 1] - Ii[i]; 3846 j = 0; 3847 while (j < nnz && J[j] < cstart) j++; 3848 ld[i] = j; 3849 J += nnz; 3850 } 3851 3852 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3853 PetscFunctionReturn(0); 3854 } 3855 3856 /*@ 3857 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3858 (the default parallel PETSc format). 3859 3860 Collective 3861 3862 Input Parameters: 3863 + B - the matrix 3864 . i - the indices into j for the start of each local row (starts with zero) 3865 . j - the column indices for each local row (starts with zero) 3866 - v - optional values in the matrix 3867 3868 Level: developer 3869 3870 Notes: 3871 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3872 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3873 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3874 3875 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3876 3877 The format which is used for the sparse matrix input, is equivalent to a 3878 row-major ordering.. i.e for the following matrix, the input data expected is 3879 as shown 3880 3881 $ 1 0 0 3882 $ 2 0 3 P0 3883 $ ------- 3884 $ 4 5 6 P1 3885 $ 3886 $ Process0 [P0]: rows_owned=[0,1] 3887 $ i = {0,1,3} [size = nrow+1 = 2+1] 3888 $ j = {0,0,2} [size = 3] 3889 $ v = {1,2,3} [size = 3] 3890 $ 3891 $ Process1 [P1]: rows_owned=[2] 3892 $ i = {0,3} [size = nrow+1 = 1+1] 3893 $ j = {0,1,2} [size = 3] 3894 $ v = {4,5,6} [size = 3] 3895 3896 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3897 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3898 @*/ 3899 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) { 3900 PetscFunctionBegin; 3901 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3902 PetscFunctionReturn(0); 3903 } 3904 3905 /*@C 3906 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3907 (the default parallel PETSc format). For good matrix assembly performance 3908 the user should preallocate the matrix storage by setting the parameters 3909 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3910 performance can be increased by more than a factor of 50. 3911 3912 Collective 3913 3914 Input Parameters: 3915 + B - the matrix 3916 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3917 (same value is used for all local rows) 3918 . d_nnz - array containing the number of nonzeros in the various rows of the 3919 DIAGONAL portion of the local submatrix (possibly different for each row) 3920 or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure. 3921 The size of this array is equal to the number of local rows, i.e 'm'. 3922 For matrices that will be factored, you must leave room for (and set) 3923 the diagonal entry even if it is zero. 3924 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3925 submatrix (same value is used for all local rows). 3926 - o_nnz - array containing the number of nonzeros in the various rows of the 3927 OFF-DIAGONAL portion of the local submatrix (possibly different for 3928 each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero 3929 structure. The size of this array is equal to the number 3930 of local rows, i.e 'm'. 3931 3932 If the *_nnz parameter is given then the *_nz parameter is ignored 3933 3934 The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77 3935 storage. The stored row and column indices begin with zero. 3936 See [Sparse Matrices](sec_matsparse) for details. 3937 3938 The parallel matrix is partitioned such that the first m0 rows belong to 3939 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3940 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3941 3942 The DIAGONAL portion of the local submatrix of a processor can be defined 3943 as the submatrix which is obtained by extraction the part corresponding to 3944 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3945 first row that belongs to the processor, r2 is the last row belonging to 3946 the this processor, and c1-c2 is range of indices of the local part of a 3947 vector suitable for applying the matrix to. This is an mxn matrix. In the 3948 common case of a square matrix, the row and column ranges are the same and 3949 the DIAGONAL part is also square. The remaining portion of the local 3950 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3951 3952 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3953 3954 You can call MatGetInfo() to get information on how effective the preallocation was; 3955 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3956 You can also run with the option -info and look for messages with the string 3957 malloc in them to see if additional memory allocation was needed. 3958 3959 Example usage: 3960 3961 Consider the following 8x8 matrix with 34 non-zero values, that is 3962 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3963 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3964 as follows: 3965 3966 .vb 3967 1 2 0 | 0 3 0 | 0 4 3968 Proc0 0 5 6 | 7 0 0 | 8 0 3969 9 0 10 | 11 0 0 | 12 0 3970 ------------------------------------- 3971 13 0 14 | 15 16 17 | 0 0 3972 Proc1 0 18 0 | 19 20 21 | 0 0 3973 0 0 0 | 22 23 0 | 24 0 3974 ------------------------------------- 3975 Proc2 25 26 27 | 0 0 28 | 29 0 3976 30 0 0 | 31 32 33 | 0 34 3977 .ve 3978 3979 This can be represented as a collection of submatrices as: 3980 3981 .vb 3982 A B C 3983 D E F 3984 G H I 3985 .ve 3986 3987 Where the submatrices A,B,C are owned by proc0, D,E,F are 3988 owned by proc1, G,H,I are owned by proc2. 3989 3990 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3991 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3992 The 'M','N' parameters are 8,8, and have the same values on all procs. 3993 3994 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3995 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3996 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3997 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3998 part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ 3999 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4000 4001 When d_nz, o_nz parameters are specified, d_nz storage elements are 4002 allocated for every row of the local diagonal submatrix, and o_nz 4003 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4004 One way to choose d_nz and o_nz is to use the max nonzerors per local 4005 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4006 In this case, the values of d_nz,o_nz are: 4007 .vb 4008 proc0 : dnz = 2, o_nz = 2 4009 proc1 : dnz = 3, o_nz = 2 4010 proc2 : dnz = 1, o_nz = 4 4011 .ve 4012 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4013 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4014 for proc3. i.e we are using 12+15+10=37 storage locations to store 4015 34 values. 4016 4017 When d_nnz, o_nnz parameters are specified, the storage is specified 4018 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4019 In the above case the values for d_nnz,o_nnz are: 4020 .vb 4021 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4022 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4023 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4024 .ve 4025 Here the space allocated is sum of all the above values i.e 34, and 4026 hence pre-allocation is perfect. 4027 4028 Level: intermediate 4029 4030 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4031 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4032 @*/ 4033 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) { 4034 PetscFunctionBegin; 4035 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4036 PetscValidType(B, 1); 4037 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4038 PetscFunctionReturn(0); 4039 } 4040 4041 /*@ 4042 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4043 CSR format for the local rows. 4044 4045 Collective 4046 4047 Input Parameters: 4048 + comm - MPI communicator 4049 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4050 . n - This value should be the same as the local size used in creating the 4051 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4052 calculated if N is given) For square matrices n is almost always m. 4053 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4054 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4055 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4056 . j - column indices 4057 - a - optional matrix values 4058 4059 Output Parameter: 4060 . mat - the matrix 4061 4062 Level: intermediate 4063 4064 Notes: 4065 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4066 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4067 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4068 4069 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4070 4071 The format which is used for the sparse matrix input, is equivalent to a 4072 row-major ordering.. i.e for the following matrix, the input data expected is 4073 as shown 4074 4075 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4076 4077 $ 1 0 0 4078 $ 2 0 3 P0 4079 $ ------- 4080 $ 4 5 6 P1 4081 $ 4082 $ Process0 [P0]: rows_owned=[0,1] 4083 $ i = {0,1,3} [size = nrow+1 = 2+1] 4084 $ j = {0,0,2} [size = 3] 4085 $ v = {1,2,3} [size = 3] 4086 $ 4087 $ Process1 [P1]: rows_owned=[2] 4088 $ i = {0,3} [size = nrow+1 = 1+1] 4089 $ j = {0,1,2} [size = 3] 4090 $ v = {4,5,6} [size = 3] 4091 4092 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4093 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4094 @*/ 4095 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) { 4096 PetscFunctionBegin; 4097 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4098 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4099 PetscCall(MatCreate(comm, mat)); 4100 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4101 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4102 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4103 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4104 PetscFunctionReturn(0); 4105 } 4106 4107 /*@ 4108 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4109 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()` 4110 4111 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4112 4113 Collective 4114 4115 Input Parameters: 4116 + mat - the matrix 4117 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4118 . n - This value should be the same as the local size used in creating the 4119 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4120 calculated if N is given) For square matrices n is almost always m. 4121 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4122 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4123 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4124 . J - column indices 4125 - v - matrix values 4126 4127 Level: intermediate 4128 4129 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4130 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4131 @*/ 4132 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) { 4133 PetscInt nnz, i; 4134 PetscBool nooffprocentries; 4135 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4136 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4137 PetscScalar *ad, *ao; 4138 PetscInt ldi, Iii, md; 4139 const PetscInt *Adi = Ad->i; 4140 PetscInt *ld = Aij->ld; 4141 4142 PetscFunctionBegin; 4143 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4144 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4145 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4146 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4147 4148 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4149 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4150 4151 for (i = 0; i < m; i++) { 4152 nnz = Ii[i + 1] - Ii[i]; 4153 Iii = Ii[i]; 4154 ldi = ld[i]; 4155 md = Adi[i + 1] - Adi[i]; 4156 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4157 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4158 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4159 ad += md; 4160 ao += nnz - md; 4161 } 4162 nooffprocentries = mat->nooffprocentries; 4163 mat->nooffprocentries = PETSC_TRUE; 4164 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4165 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4166 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4167 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4168 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4169 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4170 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4171 mat->nooffprocentries = nooffprocentries; 4172 PetscFunctionReturn(0); 4173 } 4174 4175 /*@ 4176 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4177 4178 Collective 4179 4180 Input Parameters: 4181 + mat - the matrix 4182 - v - matrix values, stored by row 4183 4184 Level: intermediate 4185 4186 Note: 4187 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4188 4189 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4190 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4191 @*/ 4192 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) { 4193 PetscInt nnz, i, m; 4194 PetscBool nooffprocentries; 4195 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4196 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4197 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4198 PetscScalar *ad, *ao; 4199 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4200 PetscInt ldi, Iii, md; 4201 PetscInt *ld = Aij->ld; 4202 4203 PetscFunctionBegin; 4204 m = mat->rmap->n; 4205 4206 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4207 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4208 Iii = 0; 4209 for (i = 0; i < m; i++) { 4210 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4211 ldi = ld[i]; 4212 md = Adi[i + 1] - Adi[i]; 4213 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4214 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4215 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4216 ad += md; 4217 ao += nnz - md; 4218 Iii += nnz; 4219 } 4220 nooffprocentries = mat->nooffprocentries; 4221 mat->nooffprocentries = PETSC_TRUE; 4222 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4223 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4224 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4225 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4226 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4227 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4228 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4229 mat->nooffprocentries = nooffprocentries; 4230 PetscFunctionReturn(0); 4231 } 4232 4233 /*@C 4234 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4235 (the default parallel PETSc format). For good matrix assembly performance 4236 the user should preallocate the matrix storage by setting the parameters 4237 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4238 performance can be increased by more than a factor of 50. 4239 4240 Collective 4241 4242 Input Parameters: 4243 + comm - MPI communicator 4244 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4245 This value should be the same as the local size used in creating the 4246 y vector for the matrix-vector product y = Ax. 4247 . n - This value should be the same as the local size used in creating the 4248 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4249 calculated if N is given) For square matrices n is almost always m. 4250 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4251 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4252 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4253 (same value is used for all local rows) 4254 . d_nnz - array containing the number of nonzeros in the various rows of the 4255 DIAGONAL portion of the local submatrix (possibly different for each row) 4256 or NULL, if d_nz is used to specify the nonzero structure. 4257 The size of this array is equal to the number of local rows, i.e 'm'. 4258 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4259 submatrix (same value is used for all local rows). 4260 - o_nnz - array containing the number of nonzeros in the various rows of the 4261 OFF-DIAGONAL portion of the local submatrix (possibly different for 4262 each row) or NULL, if o_nz is used to specify the nonzero 4263 structure. The size of this array is equal to the number 4264 of local rows, i.e 'm'. 4265 4266 Output Parameter: 4267 . A - the matrix 4268 4269 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4270 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4271 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4272 4273 Notes: 4274 If the *_nnz parameter is given then the *_nz parameter is ignored 4275 4276 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4277 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4278 storage requirements for this matrix. 4279 4280 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4281 processor than it must be used on all processors that share the object for 4282 that argument. 4283 4284 The user MUST specify either the local or global matrix dimensions 4285 (possibly both). 4286 4287 The parallel matrix is partitioned across processors such that the 4288 first m0 rows belong to process 0, the next m1 rows belong to 4289 process 1, the next m2 rows belong to process 2 etc.. where 4290 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4291 values corresponding to [m x N] submatrix. 4292 4293 The columns are logically partitioned with the n0 columns belonging 4294 to 0th partition, the next n1 columns belonging to the next 4295 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4296 4297 The DIAGONAL portion of the local submatrix on any given processor 4298 is the submatrix corresponding to the rows and columns m,n 4299 corresponding to the given processor. i.e diagonal matrix on 4300 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4301 etc. The remaining portion of the local submatrix [m x (N-n)] 4302 constitute the OFF-DIAGONAL portion. The example below better 4303 illustrates this concept. 4304 4305 For a square global matrix we define each processor's diagonal portion 4306 to be its local rows and the corresponding columns (a square submatrix); 4307 each processor's off-diagonal portion encompasses the remainder of the 4308 local matrix (a rectangular submatrix). 4309 4310 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4311 4312 When calling this routine with a single process communicator, a matrix of 4313 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4314 type of communicator, use the construction mechanism 4315 .vb 4316 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4317 .ve 4318 4319 $ MatCreate(...,&A); 4320 $ MatSetType(A,MATMPIAIJ); 4321 $ MatSetSizes(A, m,n,M,N); 4322 $ MatMPIAIJSetPreallocation(A,...); 4323 4324 By default, this format uses inodes (identical nodes) when possible. 4325 We search for consecutive rows with the same nonzero structure, thereby 4326 reusing matrix information to achieve increased efficiency. 4327 4328 Options Database Keys: 4329 + -mat_no_inode - Do not use inodes 4330 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4331 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4332 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4333 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4334 4335 Example usage: 4336 4337 Consider the following 8x8 matrix with 34 non-zero values, that is 4338 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4339 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4340 as follows 4341 4342 .vb 4343 1 2 0 | 0 3 0 | 0 4 4344 Proc0 0 5 6 | 7 0 0 | 8 0 4345 9 0 10 | 11 0 0 | 12 0 4346 ------------------------------------- 4347 13 0 14 | 15 16 17 | 0 0 4348 Proc1 0 18 0 | 19 20 21 | 0 0 4349 0 0 0 | 22 23 0 | 24 0 4350 ------------------------------------- 4351 Proc2 25 26 27 | 0 0 28 | 29 0 4352 30 0 0 | 31 32 33 | 0 34 4353 .ve 4354 4355 This can be represented as a collection of submatrices as 4356 4357 .vb 4358 A B C 4359 D E F 4360 G H I 4361 .ve 4362 4363 Where the submatrices A,B,C are owned by proc0, D,E,F are 4364 owned by proc1, G,H,I are owned by proc2. 4365 4366 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4367 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4368 The 'M','N' parameters are 8,8, and have the same values on all procs. 4369 4370 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4371 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4372 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4373 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4374 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4375 matrix, ans [DF] as another SeqAIJ matrix. 4376 4377 When d_nz, o_nz parameters are specified, d_nz storage elements are 4378 allocated for every row of the local diagonal submatrix, and o_nz 4379 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4380 One way to choose d_nz and o_nz is to use the max nonzerors per local 4381 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4382 In this case, the values of d_nz,o_nz are 4383 .vb 4384 proc0 : dnz = 2, o_nz = 2 4385 proc1 : dnz = 3, o_nz = 2 4386 proc2 : dnz = 1, o_nz = 4 4387 .ve 4388 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4389 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4390 for proc3. i.e we are using 12+15+10=37 storage locations to store 4391 34 values. 4392 4393 When d_nnz, o_nnz parameters are specified, the storage is specified 4394 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4395 In the above case the values for d_nnz,o_nnz are 4396 .vb 4397 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4398 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4399 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4400 .ve 4401 Here the space allocated is sum of all the above values i.e 34, and 4402 hence pre-allocation is perfect. 4403 4404 Level: intermediate 4405 4406 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4407 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4408 @*/ 4409 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) { 4410 PetscMPIInt size; 4411 4412 PetscFunctionBegin; 4413 PetscCall(MatCreate(comm, A)); 4414 PetscCall(MatSetSizes(*A, m, n, M, N)); 4415 PetscCallMPI(MPI_Comm_size(comm, &size)); 4416 if (size > 1) { 4417 PetscCall(MatSetType(*A, MATMPIAIJ)); 4418 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4419 } else { 4420 PetscCall(MatSetType(*A, MATSEQAIJ)); 4421 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4422 } 4423 PetscFunctionReturn(0); 4424 } 4425 4426 /*@C 4427 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4428 4429 Not collective 4430 4431 Input Parameter: 4432 . A - The `MATMPIAIJ` matrix 4433 4434 Output Parameters: 4435 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4436 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4437 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4438 4439 Note: 4440 The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4441 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4442 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4443 local column numbers to global column numbers in the original matrix. 4444 4445 Level: intermediate 4446 4447 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4448 @*/ 4449 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) { 4450 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4451 PetscBool flg; 4452 4453 PetscFunctionBegin; 4454 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4455 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4456 if (Ad) *Ad = a->A; 4457 if (Ao) *Ao = a->B; 4458 if (colmap) *colmap = a->garray; 4459 PetscFunctionReturn(0); 4460 } 4461 4462 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 4463 PetscInt m, N, i, rstart, nnz, Ii; 4464 PetscInt *indx; 4465 PetscScalar *values; 4466 MatType rootType; 4467 4468 PetscFunctionBegin; 4469 PetscCall(MatGetSize(inmat, &m, &N)); 4470 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4471 PetscInt *dnz, *onz, sum, bs, cbs; 4472 4473 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4474 /* Check sum(n) = N */ 4475 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4476 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4477 4478 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4479 rstart -= m; 4480 4481 MatPreallocateBegin(comm, m, n, dnz, onz); 4482 for (i = 0; i < m; i++) { 4483 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4484 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4485 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4486 } 4487 4488 PetscCall(MatCreate(comm, outmat)); 4489 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4490 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4491 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4492 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4493 PetscCall(MatSetType(*outmat, rootType)); 4494 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4495 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4496 MatPreallocateEnd(dnz, onz); 4497 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4498 } 4499 4500 /* numeric phase */ 4501 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4502 for (i = 0; i < m; i++) { 4503 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4504 Ii = i + rstart; 4505 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4506 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4507 } 4508 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4509 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 PetscErrorCode MatFileSplit(Mat A, char *outfile) { 4514 PetscMPIInt rank; 4515 PetscInt m, N, i, rstart, nnz; 4516 size_t len; 4517 const PetscInt *indx; 4518 PetscViewer out; 4519 char *name; 4520 Mat B; 4521 const PetscScalar *values; 4522 4523 PetscFunctionBegin; 4524 PetscCall(MatGetLocalSize(A, &m, NULL)); 4525 PetscCall(MatGetSize(A, NULL, &N)); 4526 /* Should this be the type of the diagonal block of A? */ 4527 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4528 PetscCall(MatSetSizes(B, m, N, m, N)); 4529 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4530 PetscCall(MatSetType(B, MATSEQAIJ)); 4531 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4532 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4533 for (i = 0; i < m; i++) { 4534 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4535 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4536 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4537 } 4538 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4539 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4540 4541 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4542 PetscCall(PetscStrlen(outfile, &len)); 4543 PetscCall(PetscMalloc1(len + 6, &name)); 4544 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4545 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4546 PetscCall(PetscFree(name)); 4547 PetscCall(MatView(B, out)); 4548 PetscCall(PetscViewerDestroy(&out)); 4549 PetscCall(MatDestroy(&B)); 4550 PetscFunctionReturn(0); 4551 } 4552 4553 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) { 4554 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4555 4556 PetscFunctionBegin; 4557 if (!merge) PetscFunctionReturn(0); 4558 PetscCall(PetscFree(merge->id_r)); 4559 PetscCall(PetscFree(merge->len_s)); 4560 PetscCall(PetscFree(merge->len_r)); 4561 PetscCall(PetscFree(merge->bi)); 4562 PetscCall(PetscFree(merge->bj)); 4563 PetscCall(PetscFree(merge->buf_ri[0])); 4564 PetscCall(PetscFree(merge->buf_ri)); 4565 PetscCall(PetscFree(merge->buf_rj[0])); 4566 PetscCall(PetscFree(merge->buf_rj)); 4567 PetscCall(PetscFree(merge->coi)); 4568 PetscCall(PetscFree(merge->coj)); 4569 PetscCall(PetscFree(merge->owners_co)); 4570 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4571 PetscCall(PetscFree(merge)); 4572 PetscFunctionReturn(0); 4573 } 4574 4575 #include <../src/mat/utils/freespace.h> 4576 #include <petscbt.h> 4577 4578 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) { 4579 MPI_Comm comm; 4580 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4581 PetscMPIInt size, rank, taga, *len_s; 4582 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4583 PetscInt proc, m; 4584 PetscInt **buf_ri, **buf_rj; 4585 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4586 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4587 MPI_Request *s_waits, *r_waits; 4588 MPI_Status *status; 4589 const MatScalar *aa, *a_a; 4590 MatScalar **abuf_r, *ba_i; 4591 Mat_Merge_SeqsToMPI *merge; 4592 PetscContainer container; 4593 4594 PetscFunctionBegin; 4595 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4596 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4597 4598 PetscCallMPI(MPI_Comm_size(comm, &size)); 4599 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4600 4601 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4602 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4603 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4604 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4605 aa = a_a; 4606 4607 bi = merge->bi; 4608 bj = merge->bj; 4609 buf_ri = merge->buf_ri; 4610 buf_rj = merge->buf_rj; 4611 4612 PetscCall(PetscMalloc1(size, &status)); 4613 owners = merge->rowmap->range; 4614 len_s = merge->len_s; 4615 4616 /* send and recv matrix values */ 4617 /*-----------------------------*/ 4618 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4619 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4620 4621 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4622 for (proc = 0, k = 0; proc < size; proc++) { 4623 if (!len_s[proc]) continue; 4624 i = owners[proc]; 4625 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4626 k++; 4627 } 4628 4629 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4630 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4631 PetscCall(PetscFree(status)); 4632 4633 PetscCall(PetscFree(s_waits)); 4634 PetscCall(PetscFree(r_waits)); 4635 4636 /* insert mat values of mpimat */ 4637 /*----------------------------*/ 4638 PetscCall(PetscMalloc1(N, &ba_i)); 4639 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4640 4641 for (k = 0; k < merge->nrecv; k++) { 4642 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4643 nrows = *(buf_ri_k[k]); 4644 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4645 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4646 } 4647 4648 /* set values of ba */ 4649 m = merge->rowmap->n; 4650 for (i = 0; i < m; i++) { 4651 arow = owners[rank] + i; 4652 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4653 bnzi = bi[i + 1] - bi[i]; 4654 PetscCall(PetscArrayzero(ba_i, bnzi)); 4655 4656 /* add local non-zero vals of this proc's seqmat into ba */ 4657 anzi = ai[arow + 1] - ai[arow]; 4658 aj = a->j + ai[arow]; 4659 aa = a_a + ai[arow]; 4660 nextaj = 0; 4661 for (j = 0; nextaj < anzi; j++) { 4662 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4663 ba_i[j] += aa[nextaj++]; 4664 } 4665 } 4666 4667 /* add received vals into ba */ 4668 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4669 /* i-th row */ 4670 if (i == *nextrow[k]) { 4671 anzi = *(nextai[k] + 1) - *nextai[k]; 4672 aj = buf_rj[k] + *(nextai[k]); 4673 aa = abuf_r[k] + *(nextai[k]); 4674 nextaj = 0; 4675 for (j = 0; nextaj < anzi; j++) { 4676 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4677 ba_i[j] += aa[nextaj++]; 4678 } 4679 } 4680 nextrow[k]++; 4681 nextai[k]++; 4682 } 4683 } 4684 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4685 } 4686 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4687 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4688 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4689 4690 PetscCall(PetscFree(abuf_r[0])); 4691 PetscCall(PetscFree(abuf_r)); 4692 PetscCall(PetscFree(ba_i)); 4693 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4694 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4695 PetscFunctionReturn(0); 4696 } 4697 4698 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) { 4699 Mat B_mpi; 4700 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4701 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4702 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4703 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4704 PetscInt len, proc, *dnz, *onz, bs, cbs; 4705 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4706 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4707 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4708 MPI_Status *status; 4709 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4710 PetscBT lnkbt; 4711 Mat_Merge_SeqsToMPI *merge; 4712 PetscContainer container; 4713 4714 PetscFunctionBegin; 4715 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4716 4717 /* make sure it is a PETSc comm */ 4718 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4719 PetscCallMPI(MPI_Comm_size(comm, &size)); 4720 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4721 4722 PetscCall(PetscNew(&merge)); 4723 PetscCall(PetscMalloc1(size, &status)); 4724 4725 /* determine row ownership */ 4726 /*---------------------------------------------------------*/ 4727 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4728 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4729 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4730 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4731 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4732 PetscCall(PetscMalloc1(size, &len_si)); 4733 PetscCall(PetscMalloc1(size, &merge->len_s)); 4734 4735 m = merge->rowmap->n; 4736 owners = merge->rowmap->range; 4737 4738 /* determine the number of messages to send, their lengths */ 4739 /*---------------------------------------------------------*/ 4740 len_s = merge->len_s; 4741 4742 len = 0; /* length of buf_si[] */ 4743 merge->nsend = 0; 4744 for (proc = 0; proc < size; proc++) { 4745 len_si[proc] = 0; 4746 if (proc == rank) { 4747 len_s[proc] = 0; 4748 } else { 4749 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4750 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4751 } 4752 if (len_s[proc]) { 4753 merge->nsend++; 4754 nrows = 0; 4755 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4756 if (ai[i + 1] > ai[i]) nrows++; 4757 } 4758 len_si[proc] = 2 * (nrows + 1); 4759 len += len_si[proc]; 4760 } 4761 } 4762 4763 /* determine the number and length of messages to receive for ij-structure */ 4764 /*-------------------------------------------------------------------------*/ 4765 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4766 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4767 4768 /* post the Irecv of j-structure */ 4769 /*-------------------------------*/ 4770 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4771 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4772 4773 /* post the Isend of j-structure */ 4774 /*--------------------------------*/ 4775 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4776 4777 for (proc = 0, k = 0; proc < size; proc++) { 4778 if (!len_s[proc]) continue; 4779 i = owners[proc]; 4780 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4781 k++; 4782 } 4783 4784 /* receives and sends of j-structure are complete */ 4785 /*------------------------------------------------*/ 4786 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4787 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4788 4789 /* send and recv i-structure */ 4790 /*---------------------------*/ 4791 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4792 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4793 4794 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4795 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4796 for (proc = 0, k = 0; proc < size; proc++) { 4797 if (!len_s[proc]) continue; 4798 /* form outgoing message for i-structure: 4799 buf_si[0]: nrows to be sent 4800 [1:nrows]: row index (global) 4801 [nrows+1:2*nrows+1]: i-structure index 4802 */ 4803 /*-------------------------------------------*/ 4804 nrows = len_si[proc] / 2 - 1; 4805 buf_si_i = buf_si + nrows + 1; 4806 buf_si[0] = nrows; 4807 buf_si_i[0] = 0; 4808 nrows = 0; 4809 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4810 anzi = ai[i + 1] - ai[i]; 4811 if (anzi) { 4812 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4813 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4814 nrows++; 4815 } 4816 } 4817 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4818 k++; 4819 buf_si += len_si[proc]; 4820 } 4821 4822 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4823 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4824 4825 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4826 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4827 4828 PetscCall(PetscFree(len_si)); 4829 PetscCall(PetscFree(len_ri)); 4830 PetscCall(PetscFree(rj_waits)); 4831 PetscCall(PetscFree2(si_waits, sj_waits)); 4832 PetscCall(PetscFree(ri_waits)); 4833 PetscCall(PetscFree(buf_s)); 4834 PetscCall(PetscFree(status)); 4835 4836 /* compute a local seq matrix in each processor */ 4837 /*----------------------------------------------*/ 4838 /* allocate bi array and free space for accumulating nonzero column info */ 4839 PetscCall(PetscMalloc1(m + 1, &bi)); 4840 bi[0] = 0; 4841 4842 /* create and initialize a linked list */ 4843 nlnk = N + 1; 4844 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4845 4846 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4847 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4848 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4849 4850 current_space = free_space; 4851 4852 /* determine symbolic info for each local row */ 4853 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4854 4855 for (k = 0; k < merge->nrecv; k++) { 4856 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4857 nrows = *buf_ri_k[k]; 4858 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4859 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4860 } 4861 4862 MatPreallocateBegin(comm, m, n, dnz, onz); 4863 len = 0; 4864 for (i = 0; i < m; i++) { 4865 bnzi = 0; 4866 /* add local non-zero cols of this proc's seqmat into lnk */ 4867 arow = owners[rank] + i; 4868 anzi = ai[arow + 1] - ai[arow]; 4869 aj = a->j + ai[arow]; 4870 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4871 bnzi += nlnk; 4872 /* add received col data into lnk */ 4873 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4874 if (i == *nextrow[k]) { /* i-th row */ 4875 anzi = *(nextai[k] + 1) - *nextai[k]; 4876 aj = buf_rj[k] + *nextai[k]; 4877 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4878 bnzi += nlnk; 4879 nextrow[k]++; 4880 nextai[k]++; 4881 } 4882 } 4883 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4884 4885 /* if free space is not available, make more free space */ 4886 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4887 /* copy data into free space, then initialize lnk */ 4888 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4889 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4890 4891 current_space->array += bnzi; 4892 current_space->local_used += bnzi; 4893 current_space->local_remaining -= bnzi; 4894 4895 bi[i + 1] = bi[i] + bnzi; 4896 } 4897 4898 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4899 4900 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 4901 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4902 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4903 4904 /* create symbolic parallel matrix B_mpi */ 4905 /*---------------------------------------*/ 4906 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4907 PetscCall(MatCreate(comm, &B_mpi)); 4908 if (n == PETSC_DECIDE) { 4909 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4910 } else { 4911 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4912 } 4913 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 4914 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 4915 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 4916 MatPreallocateEnd(dnz, onz); 4917 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 4918 4919 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4920 B_mpi->assembled = PETSC_FALSE; 4921 merge->bi = bi; 4922 merge->bj = bj; 4923 merge->buf_ri = buf_ri; 4924 merge->buf_rj = buf_rj; 4925 merge->coi = NULL; 4926 merge->coj = NULL; 4927 merge->owners_co = NULL; 4928 4929 PetscCall(PetscCommDestroy(&comm)); 4930 4931 /* attach the supporting struct to B_mpi for reuse */ 4932 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 4933 PetscCall(PetscContainerSetPointer(container, merge)); 4934 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 4935 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 4936 PetscCall(PetscContainerDestroy(&container)); 4937 *mpimat = B_mpi; 4938 4939 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4940 PetscFunctionReturn(0); 4941 } 4942 4943 /*@C 4944 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 4945 matrices from each processor 4946 4947 Collective 4948 4949 Input Parameters: 4950 + comm - the communicators the parallel matrix will live on 4951 . seqmat - the input sequential matrices 4952 . m - number of local rows (or `PETSC_DECIDE`) 4953 . n - number of local columns (or `PETSC_DECIDE`) 4954 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 4955 4956 Output Parameter: 4957 . mpimat - the parallel matrix generated 4958 4959 Level: advanced 4960 4961 Note: 4962 The dimensions of the sequential matrix in each processor MUST be the same. 4963 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4964 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 4965 @*/ 4966 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) { 4967 PetscMPIInt size; 4968 4969 PetscFunctionBegin; 4970 PetscCallMPI(MPI_Comm_size(comm, &size)); 4971 if (size == 1) { 4972 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 4973 if (scall == MAT_INITIAL_MATRIX) { 4974 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 4975 } else { 4976 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 4977 } 4978 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 4979 PetscFunctionReturn(0); 4980 } 4981 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 4982 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 4983 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 4984 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 4985 PetscFunctionReturn(0); 4986 } 4987 4988 /*@ 4989 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 4990 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 4991 with `MatGetSize()` 4992 4993 Not Collective 4994 4995 Input Parameters: 4996 + A - the matrix 4997 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 4998 4999 Output Parameter: 5000 . A_loc - the local sequential matrix generated 5001 5002 Level: developer 5003 5004 Notes: 5005 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5006 5007 Destroy the matrix with `MatDestroy()` 5008 5009 .seealso: `MatMPIAIJGetLocalMat()` 5010 @*/ 5011 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) { 5012 PetscBool mpi; 5013 5014 PetscFunctionBegin; 5015 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5016 if (mpi) { 5017 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5018 } else { 5019 *A_loc = A; 5020 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5021 } 5022 PetscFunctionReturn(0); 5023 } 5024 5025 /*@ 5026 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5027 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5028 with `MatGetSize()` 5029 5030 Not Collective 5031 5032 Input Parameters: 5033 + A - the matrix 5034 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5035 5036 Output Parameter: 5037 . A_loc - the local sequential matrix generated 5038 5039 Level: developer 5040 5041 Notes: 5042 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5043 5044 When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A. 5045 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called. 5046 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5047 modify the values of the returned A_loc. 5048 5049 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5050 @*/ 5051 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) { 5052 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5053 Mat_SeqAIJ *mat, *a, *b; 5054 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5055 const PetscScalar *aa, *ba, *aav, *bav; 5056 PetscScalar *ca, *cam; 5057 PetscMPIInt size; 5058 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5059 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5060 PetscBool match; 5061 5062 PetscFunctionBegin; 5063 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5064 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5065 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5066 if (size == 1) { 5067 if (scall == MAT_INITIAL_MATRIX) { 5068 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5069 *A_loc = mpimat->A; 5070 } else if (scall == MAT_REUSE_MATRIX) { 5071 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5072 } 5073 PetscFunctionReturn(0); 5074 } 5075 5076 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5077 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5078 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5079 ai = a->i; 5080 aj = a->j; 5081 bi = b->i; 5082 bj = b->j; 5083 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5084 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5085 aa = aav; 5086 ba = bav; 5087 if (scall == MAT_INITIAL_MATRIX) { 5088 PetscCall(PetscMalloc1(1 + am, &ci)); 5089 ci[0] = 0; 5090 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5091 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5092 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5093 k = 0; 5094 for (i = 0; i < am; i++) { 5095 ncols_o = bi[i + 1] - bi[i]; 5096 ncols_d = ai[i + 1] - ai[i]; 5097 /* off-diagonal portion of A */ 5098 for (jo = 0; jo < ncols_o; jo++) { 5099 col = cmap[*bj]; 5100 if (col >= cstart) break; 5101 cj[k] = col; 5102 bj++; 5103 ca[k++] = *ba++; 5104 } 5105 /* diagonal portion of A */ 5106 for (j = 0; j < ncols_d; j++) { 5107 cj[k] = cstart + *aj++; 5108 ca[k++] = *aa++; 5109 } 5110 /* off-diagonal portion of A */ 5111 for (j = jo; j < ncols_o; j++) { 5112 cj[k] = cmap[*bj++]; 5113 ca[k++] = *ba++; 5114 } 5115 } 5116 /* put together the new matrix */ 5117 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5118 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5119 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5120 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5121 mat->free_a = PETSC_TRUE; 5122 mat->free_ij = PETSC_TRUE; 5123 mat->nonew = 0; 5124 } else if (scall == MAT_REUSE_MATRIX) { 5125 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5126 ci = mat->i; 5127 cj = mat->j; 5128 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5129 for (i = 0; i < am; i++) { 5130 /* off-diagonal portion of A */ 5131 ncols_o = bi[i + 1] - bi[i]; 5132 for (jo = 0; jo < ncols_o; jo++) { 5133 col = cmap[*bj]; 5134 if (col >= cstart) break; 5135 *cam++ = *ba++; 5136 bj++; 5137 } 5138 /* diagonal portion of A */ 5139 ncols_d = ai[i + 1] - ai[i]; 5140 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5141 /* off-diagonal portion of A */ 5142 for (j = jo; j < ncols_o; j++) { 5143 *cam++ = *ba++; 5144 bj++; 5145 } 5146 } 5147 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5148 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5149 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5150 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5151 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5152 PetscFunctionReturn(0); 5153 } 5154 5155 /*@ 5156 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5157 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5158 5159 Not Collective 5160 5161 Input Parameters: 5162 + A - the matrix 5163 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5164 5165 Output Parameters: 5166 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5167 - A_loc - the local sequential matrix generated 5168 5169 Level: developer 5170 5171 Note: 5172 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering) 5173 5174 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5175 @*/ 5176 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) { 5177 Mat Ao, Ad; 5178 const PetscInt *cmap; 5179 PetscMPIInt size; 5180 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5181 5182 PetscFunctionBegin; 5183 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5184 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5185 if (size == 1) { 5186 if (scall == MAT_INITIAL_MATRIX) { 5187 PetscCall(PetscObjectReference((PetscObject)Ad)); 5188 *A_loc = Ad; 5189 } else if (scall == MAT_REUSE_MATRIX) { 5190 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5191 } 5192 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5193 PetscFunctionReturn(0); 5194 } 5195 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5196 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5197 if (f) { 5198 PetscCall((*f)(A, scall, glob, A_loc)); 5199 } else { 5200 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5201 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5202 Mat_SeqAIJ *c; 5203 PetscInt *ai = a->i, *aj = a->j; 5204 PetscInt *bi = b->i, *bj = b->j; 5205 PetscInt *ci, *cj; 5206 const PetscScalar *aa, *ba; 5207 PetscScalar *ca; 5208 PetscInt i, j, am, dn, on; 5209 5210 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5211 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5212 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5213 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5214 if (scall == MAT_INITIAL_MATRIX) { 5215 PetscInt k; 5216 PetscCall(PetscMalloc1(1 + am, &ci)); 5217 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5218 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5219 ci[0] = 0; 5220 for (i = 0, k = 0; i < am; i++) { 5221 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5222 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5223 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5224 /* diagonal portion of A */ 5225 for (j = 0; j < ncols_d; j++, k++) { 5226 cj[k] = *aj++; 5227 ca[k] = *aa++; 5228 } 5229 /* off-diagonal portion of A */ 5230 for (j = 0; j < ncols_o; j++, k++) { 5231 cj[k] = dn + *bj++; 5232 ca[k] = *ba++; 5233 } 5234 } 5235 /* put together the new matrix */ 5236 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5237 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5238 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5239 c = (Mat_SeqAIJ *)(*A_loc)->data; 5240 c->free_a = PETSC_TRUE; 5241 c->free_ij = PETSC_TRUE; 5242 c->nonew = 0; 5243 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5244 } else if (scall == MAT_REUSE_MATRIX) { 5245 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5246 for (i = 0; i < am; i++) { 5247 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5248 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5249 /* diagonal portion of A */ 5250 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5251 /* off-diagonal portion of A */ 5252 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5253 } 5254 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5255 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5256 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5257 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5258 if (glob) { 5259 PetscInt cst, *gidx; 5260 5261 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5262 PetscCall(PetscMalloc1(dn + on, &gidx)); 5263 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5264 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5265 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5266 } 5267 } 5268 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5269 PetscFunctionReturn(0); 5270 } 5271 5272 /*@C 5273 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5274 5275 Not Collective 5276 5277 Input Parameters: 5278 + A - the matrix 5279 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5280 - row, col - index sets of rows and columns to extract (or NULL) 5281 5282 Output Parameter: 5283 . A_loc - the local sequential matrix generated 5284 5285 Level: developer 5286 5287 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5288 @*/ 5289 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) { 5290 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5291 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5292 IS isrowa, iscola; 5293 Mat *aloc; 5294 PetscBool match; 5295 5296 PetscFunctionBegin; 5297 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5298 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5299 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5300 if (!row) { 5301 start = A->rmap->rstart; 5302 end = A->rmap->rend; 5303 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5304 } else { 5305 isrowa = *row; 5306 } 5307 if (!col) { 5308 start = A->cmap->rstart; 5309 cmap = a->garray; 5310 nzA = a->A->cmap->n; 5311 nzB = a->B->cmap->n; 5312 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5313 ncols = 0; 5314 for (i = 0; i < nzB; i++) { 5315 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5316 else break; 5317 } 5318 imark = i; 5319 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5320 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5321 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5322 } else { 5323 iscola = *col; 5324 } 5325 if (scall != MAT_INITIAL_MATRIX) { 5326 PetscCall(PetscMalloc1(1, &aloc)); 5327 aloc[0] = *A_loc; 5328 } 5329 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5330 if (!col) { /* attach global id of condensed columns */ 5331 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5332 } 5333 *A_loc = aloc[0]; 5334 PetscCall(PetscFree(aloc)); 5335 if (!row) PetscCall(ISDestroy(&isrowa)); 5336 if (!col) PetscCall(ISDestroy(&iscola)); 5337 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5338 PetscFunctionReturn(0); 5339 } 5340 5341 /* 5342 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5343 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5344 * on a global size. 5345 * */ 5346 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) { 5347 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5348 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5349 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5350 PetscMPIInt owner; 5351 PetscSFNode *iremote, *oiremote; 5352 const PetscInt *lrowindices; 5353 PetscSF sf, osf; 5354 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5355 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5356 MPI_Comm comm; 5357 ISLocalToGlobalMapping mapping; 5358 const PetscScalar *pd_a, *po_a; 5359 5360 PetscFunctionBegin; 5361 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5362 /* plocalsize is the number of roots 5363 * nrows is the number of leaves 5364 * */ 5365 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5366 PetscCall(ISGetLocalSize(rows, &nrows)); 5367 PetscCall(PetscCalloc1(nrows, &iremote)); 5368 PetscCall(ISGetIndices(rows, &lrowindices)); 5369 for (i = 0; i < nrows; i++) { 5370 /* Find a remote index and an owner for a row 5371 * The row could be local or remote 5372 * */ 5373 owner = 0; 5374 lidx = 0; 5375 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5376 iremote[i].index = lidx; 5377 iremote[i].rank = owner; 5378 } 5379 /* Create SF to communicate how many nonzero columns for each row */ 5380 PetscCall(PetscSFCreate(comm, &sf)); 5381 /* SF will figure out the number of nonzero colunms for each row, and their 5382 * offsets 5383 * */ 5384 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5385 PetscCall(PetscSFSetFromOptions(sf)); 5386 PetscCall(PetscSFSetUp(sf)); 5387 5388 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5389 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5390 PetscCall(PetscCalloc1(nrows, &pnnz)); 5391 roffsets[0] = 0; 5392 roffsets[1] = 0; 5393 for (i = 0; i < plocalsize; i++) { 5394 /* diag */ 5395 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5396 /* off diag */ 5397 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5398 /* compute offsets so that we relative location for each row */ 5399 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5400 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5401 } 5402 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5403 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5404 /* 'r' means root, and 'l' means leaf */ 5405 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5406 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5407 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5408 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5409 PetscCall(PetscSFDestroy(&sf)); 5410 PetscCall(PetscFree(roffsets)); 5411 PetscCall(PetscFree(nrcols)); 5412 dntotalcols = 0; 5413 ontotalcols = 0; 5414 ncol = 0; 5415 for (i = 0; i < nrows; i++) { 5416 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5417 ncol = PetscMax(pnnz[i], ncol); 5418 /* diag */ 5419 dntotalcols += nlcols[i * 2 + 0]; 5420 /* off diag */ 5421 ontotalcols += nlcols[i * 2 + 1]; 5422 } 5423 /* We do not need to figure the right number of columns 5424 * since all the calculations will be done by going through the raw data 5425 * */ 5426 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5427 PetscCall(MatSetUp(*P_oth)); 5428 PetscCall(PetscFree(pnnz)); 5429 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5430 /* diag */ 5431 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5432 /* off diag */ 5433 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5434 /* diag */ 5435 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5436 /* off diag */ 5437 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5438 dntotalcols = 0; 5439 ontotalcols = 0; 5440 ntotalcols = 0; 5441 for (i = 0; i < nrows; i++) { 5442 owner = 0; 5443 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5444 /* Set iremote for diag matrix */ 5445 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5446 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5447 iremote[dntotalcols].rank = owner; 5448 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5449 ilocal[dntotalcols++] = ntotalcols++; 5450 } 5451 /* off diag */ 5452 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5453 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5454 oiremote[ontotalcols].rank = owner; 5455 oilocal[ontotalcols++] = ntotalcols++; 5456 } 5457 } 5458 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5459 PetscCall(PetscFree(loffsets)); 5460 PetscCall(PetscFree(nlcols)); 5461 PetscCall(PetscSFCreate(comm, &sf)); 5462 /* P serves as roots and P_oth is leaves 5463 * Diag matrix 5464 * */ 5465 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5466 PetscCall(PetscSFSetFromOptions(sf)); 5467 PetscCall(PetscSFSetUp(sf)); 5468 5469 PetscCall(PetscSFCreate(comm, &osf)); 5470 /* Off diag */ 5471 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5472 PetscCall(PetscSFSetFromOptions(osf)); 5473 PetscCall(PetscSFSetUp(osf)); 5474 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5475 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5476 /* We operate on the matrix internal data for saving memory */ 5477 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5478 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5479 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5480 /* Convert to global indices for diag matrix */ 5481 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5482 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5483 /* We want P_oth store global indices */ 5484 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5485 /* Use memory scalable approach */ 5486 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5487 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5488 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5489 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5490 /* Convert back to local indices */ 5491 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5492 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5493 nout = 0; 5494 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5495 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5496 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5497 /* Exchange values */ 5498 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5499 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5500 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5501 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5502 /* Stop PETSc from shrinking memory */ 5503 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5504 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5505 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5506 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5507 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5508 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5509 PetscCall(PetscSFDestroy(&sf)); 5510 PetscCall(PetscSFDestroy(&osf)); 5511 PetscFunctionReturn(0); 5512 } 5513 5514 /* 5515 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5516 * This supports MPIAIJ and MAIJ 5517 * */ 5518 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) { 5519 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5520 Mat_SeqAIJ *p_oth; 5521 IS rows, map; 5522 PetscHMapI hamp; 5523 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5524 MPI_Comm comm; 5525 PetscSF sf, osf; 5526 PetscBool has; 5527 5528 PetscFunctionBegin; 5529 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5530 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5531 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5532 * and then create a submatrix (that often is an overlapping matrix) 5533 * */ 5534 if (reuse == MAT_INITIAL_MATRIX) { 5535 /* Use a hash table to figure out unique keys */ 5536 PetscCall(PetscHMapICreate(&hamp)); 5537 PetscCall(PetscHMapIResize(hamp, a->B->cmap->n)); 5538 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5539 count = 0; 5540 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5541 for (i = 0; i < a->B->cmap->n; i++) { 5542 key = a->garray[i] / dof; 5543 PetscCall(PetscHMapIHas(hamp, key, &has)); 5544 if (!has) { 5545 mapping[i] = count; 5546 PetscCall(PetscHMapISet(hamp, key, count++)); 5547 } else { 5548 /* Current 'i' has the same value the previous step */ 5549 mapping[i] = count - 1; 5550 } 5551 } 5552 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5553 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5554 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ", htsize, count); 5555 PetscCall(PetscCalloc1(htsize, &rowindices)); 5556 off = 0; 5557 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5558 PetscCall(PetscHMapIDestroy(&hamp)); 5559 PetscCall(PetscSortInt(htsize, rowindices)); 5560 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5561 /* In case, the matrix was already created but users want to recreate the matrix */ 5562 PetscCall(MatDestroy(P_oth)); 5563 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5564 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5565 PetscCall(ISDestroy(&map)); 5566 PetscCall(ISDestroy(&rows)); 5567 } else if (reuse == MAT_REUSE_MATRIX) { 5568 /* If matrix was already created, we simply update values using SF objects 5569 * that as attached to the matrix ealier. 5570 */ 5571 const PetscScalar *pd_a, *po_a; 5572 5573 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5574 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5575 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5576 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5577 /* Update values in place */ 5578 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5579 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5580 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5581 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5582 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5583 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5584 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5585 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5586 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5587 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5588 PetscFunctionReturn(0); 5589 } 5590 5591 /*@C 5592 MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A 5593 5594 Collective on A 5595 5596 Input Parameters: 5597 + A - the first matrix in `MATMPIAIJ` format 5598 . B - the second matrix in `MATMPIAIJ` format 5599 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5600 5601 Output Parameters: 5602 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5603 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5604 - B_seq - the sequential matrix generated 5605 5606 Level: developer 5607 5608 @*/ 5609 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) { 5610 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5611 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5612 IS isrowb, iscolb; 5613 Mat *bseq = NULL; 5614 5615 PetscFunctionBegin; 5616 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5617 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5618 } 5619 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5620 5621 if (scall == MAT_INITIAL_MATRIX) { 5622 start = A->cmap->rstart; 5623 cmap = a->garray; 5624 nzA = a->A->cmap->n; 5625 nzB = a->B->cmap->n; 5626 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5627 ncols = 0; 5628 for (i = 0; i < nzB; i++) { /* row < local row index */ 5629 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5630 else break; 5631 } 5632 imark = i; 5633 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5634 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5635 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5636 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5637 } else { 5638 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5639 isrowb = *rowb; 5640 iscolb = *colb; 5641 PetscCall(PetscMalloc1(1, &bseq)); 5642 bseq[0] = *B_seq; 5643 } 5644 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5645 *B_seq = bseq[0]; 5646 PetscCall(PetscFree(bseq)); 5647 if (!rowb) { 5648 PetscCall(ISDestroy(&isrowb)); 5649 } else { 5650 *rowb = isrowb; 5651 } 5652 if (!colb) { 5653 PetscCall(ISDestroy(&iscolb)); 5654 } else { 5655 *colb = iscolb; 5656 } 5657 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5658 PetscFunctionReturn(0); 5659 } 5660 5661 /* 5662 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5663 of the OFF-DIAGONAL portion of local A 5664 5665 Collective on Mat 5666 5667 Input Parameters: 5668 + A,B - the matrices in mpiaij format 5669 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5670 5671 Output Parameter: 5672 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5673 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5674 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5675 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5676 5677 Developer Note: 5678 This directly accesses information inside the VecScatter associated with the matrix-vector product 5679 for this matrix. This is not desirable.. 5680 5681 Level: developer 5682 5683 */ 5684 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) { 5685 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5686 Mat_SeqAIJ *b_oth; 5687 VecScatter ctx; 5688 MPI_Comm comm; 5689 const PetscMPIInt *rprocs, *sprocs; 5690 const PetscInt *srow, *rstarts, *sstarts; 5691 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5692 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5693 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5694 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5695 PetscMPIInt size, tag, rank, nreqs; 5696 5697 PetscFunctionBegin; 5698 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5699 PetscCallMPI(MPI_Comm_size(comm, &size)); 5700 5701 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5702 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5703 } 5704 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5705 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5706 5707 if (size == 1) { 5708 startsj_s = NULL; 5709 bufa_ptr = NULL; 5710 *B_oth = NULL; 5711 PetscFunctionReturn(0); 5712 } 5713 5714 ctx = a->Mvctx; 5715 tag = ((PetscObject)ctx)->tag; 5716 5717 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5718 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5719 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5720 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5721 PetscCall(PetscMalloc1(nreqs, &reqs)); 5722 rwaits = reqs; 5723 swaits = reqs + nrecvs; 5724 5725 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5726 if (scall == MAT_INITIAL_MATRIX) { 5727 /* i-array */ 5728 /*---------*/ 5729 /* post receives */ 5730 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5731 for (i = 0; i < nrecvs; i++) { 5732 rowlen = rvalues + rstarts[i] * rbs; 5733 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5734 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5735 } 5736 5737 /* pack the outgoing message */ 5738 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5739 5740 sstartsj[0] = 0; 5741 rstartsj[0] = 0; 5742 len = 0; /* total length of j or a array to be sent */ 5743 if (nsends) { 5744 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5745 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5746 } 5747 for (i = 0; i < nsends; i++) { 5748 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5749 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5750 for (j = 0; j < nrows; j++) { 5751 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5752 for (l = 0; l < sbs; l++) { 5753 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5754 5755 rowlen[j * sbs + l] = ncols; 5756 5757 len += ncols; 5758 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5759 } 5760 k++; 5761 } 5762 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5763 5764 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5765 } 5766 /* recvs and sends of i-array are completed */ 5767 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5768 PetscCall(PetscFree(svalues)); 5769 5770 /* allocate buffers for sending j and a arrays */ 5771 PetscCall(PetscMalloc1(len + 1, &bufj)); 5772 PetscCall(PetscMalloc1(len + 1, &bufa)); 5773 5774 /* create i-array of B_oth */ 5775 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5776 5777 b_othi[0] = 0; 5778 len = 0; /* total length of j or a array to be received */ 5779 k = 0; 5780 for (i = 0; i < nrecvs; i++) { 5781 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5782 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5783 for (j = 0; j < nrows; j++) { 5784 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5785 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5786 k++; 5787 } 5788 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5789 } 5790 PetscCall(PetscFree(rvalues)); 5791 5792 /* allocate space for j and a arrays of B_oth */ 5793 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5794 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5795 5796 /* j-array */ 5797 /*---------*/ 5798 /* post receives of j-array */ 5799 for (i = 0; i < nrecvs; i++) { 5800 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5801 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5802 } 5803 5804 /* pack the outgoing message j-array */ 5805 if (nsends) k = sstarts[0]; 5806 for (i = 0; i < nsends; i++) { 5807 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5808 bufJ = bufj + sstartsj[i]; 5809 for (j = 0; j < nrows; j++) { 5810 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5811 for (ll = 0; ll < sbs; ll++) { 5812 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5813 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5814 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5815 } 5816 } 5817 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5818 } 5819 5820 /* recvs and sends of j-array are completed */ 5821 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5822 } else if (scall == MAT_REUSE_MATRIX) { 5823 sstartsj = *startsj_s; 5824 rstartsj = *startsj_r; 5825 bufa = *bufa_ptr; 5826 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5827 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5828 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5829 5830 /* a-array */ 5831 /*---------*/ 5832 /* post receives of a-array */ 5833 for (i = 0; i < nrecvs; i++) { 5834 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5835 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5836 } 5837 5838 /* pack the outgoing message a-array */ 5839 if (nsends) k = sstarts[0]; 5840 for (i = 0; i < nsends; i++) { 5841 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5842 bufA = bufa + sstartsj[i]; 5843 for (j = 0; j < nrows; j++) { 5844 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5845 for (ll = 0; ll < sbs; ll++) { 5846 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5847 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5848 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5849 } 5850 } 5851 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5852 } 5853 /* recvs and sends of a-array are completed */ 5854 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5855 PetscCall(PetscFree(reqs)); 5856 5857 if (scall == MAT_INITIAL_MATRIX) { 5858 /* put together the new matrix */ 5859 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5860 5861 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5862 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5863 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5864 b_oth->free_a = PETSC_TRUE; 5865 b_oth->free_ij = PETSC_TRUE; 5866 b_oth->nonew = 0; 5867 5868 PetscCall(PetscFree(bufj)); 5869 if (!startsj_s || !bufa_ptr) { 5870 PetscCall(PetscFree2(sstartsj, rstartsj)); 5871 PetscCall(PetscFree(bufa_ptr)); 5872 } else { 5873 *startsj_s = sstartsj; 5874 *startsj_r = rstartsj; 5875 *bufa_ptr = bufa; 5876 } 5877 } else if (scall == MAT_REUSE_MATRIX) { 5878 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5879 } 5880 5881 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5882 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5883 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5884 PetscFunctionReturn(0); 5885 } 5886 5887 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5888 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5889 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5890 #if defined(PETSC_HAVE_MKL_SPARSE) 5891 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5892 #endif 5893 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5894 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5895 #if defined(PETSC_HAVE_ELEMENTAL) 5896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5897 #endif 5898 #if defined(PETSC_HAVE_SCALAPACK) 5899 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 5900 #endif 5901 #if defined(PETSC_HAVE_HYPRE) 5902 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 5903 #endif 5904 #if defined(PETSC_HAVE_CUDA) 5905 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 5906 #endif 5907 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5908 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 5909 #endif 5910 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 5911 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 5912 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5913 5914 /* 5915 Computes (B'*A')' since computing B*A directly is untenable 5916 5917 n p p 5918 [ ] [ ] [ ] 5919 m [ A ] * n [ B ] = m [ C ] 5920 [ ] [ ] [ ] 5921 5922 */ 5923 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) { 5924 Mat At, Bt, Ct; 5925 5926 PetscFunctionBegin; 5927 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 5928 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 5929 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 5930 PetscCall(MatDestroy(&At)); 5931 PetscCall(MatDestroy(&Bt)); 5932 PetscCall(MatTransposeSetPrecursor(Ct, C)); 5933 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 5934 PetscCall(MatDestroy(&Ct)); 5935 PetscFunctionReturn(0); 5936 } 5937 5938 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) { 5939 PetscBool cisdense; 5940 5941 PetscFunctionBegin; 5942 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 5943 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 5944 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 5945 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 5946 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 5947 PetscCall(MatSetUp(C)); 5948 5949 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5950 PetscFunctionReturn(0); 5951 } 5952 5953 /* ----------------------------------------------------------------*/ 5954 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) { 5955 Mat_Product *product = C->product; 5956 Mat A = product->A, B = product->B; 5957 5958 PetscFunctionBegin; 5959 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5960 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5961 5962 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5963 C->ops->productsymbolic = MatProductSymbolic_AB; 5964 PetscFunctionReturn(0); 5965 } 5966 5967 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) { 5968 Mat_Product *product = C->product; 5969 5970 PetscFunctionBegin; 5971 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5972 PetscFunctionReturn(0); 5973 } 5974 5975 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 5976 5977 Input Parameters: 5978 5979 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5980 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5981 5982 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 5983 5984 For Set1, j1[] contains column indices of the nonzeros. 5985 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 5986 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 5987 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 5988 5989 Similar for Set2. 5990 5991 This routine merges the two sets of nonzeros row by row and removes repeats. 5992 5993 Output Parameters: (memory is allocated by the caller) 5994 5995 i[],j[]: the CSR of the merged matrix, which has m rows. 5996 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 5997 imap2[]: similar to imap1[], but for Set2. 5998 Note we order nonzeros row-by-row and from left to right. 5999 */ 6000 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) { 6001 PetscInt r, m; /* Row index of mat */ 6002 PetscCount t, t1, t2, b1, e1, b2, e2; 6003 6004 PetscFunctionBegin; 6005 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6006 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6007 i[0] = 0; 6008 for (r = 0; r < m; r++) { /* Do row by row merging */ 6009 b1 = rowBegin1[r]; 6010 e1 = rowEnd1[r]; 6011 b2 = rowBegin2[r]; 6012 e2 = rowEnd2[r]; 6013 while (b1 < e1 && b2 < e2) { 6014 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6015 j[t] = j1[b1]; 6016 imap1[t1] = t; 6017 imap2[t2] = t; 6018 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6019 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6020 t1++; 6021 t2++; 6022 t++; 6023 } else if (j1[b1] < j2[b2]) { 6024 j[t] = j1[b1]; 6025 imap1[t1] = t; 6026 b1 += jmap1[t1 + 1] - jmap1[t1]; 6027 t1++; 6028 t++; 6029 } else { 6030 j[t] = j2[b2]; 6031 imap2[t2] = t; 6032 b2 += jmap2[t2 + 1] - jmap2[t2]; 6033 t2++; 6034 t++; 6035 } 6036 } 6037 /* Merge the remaining in either j1[] or j2[] */ 6038 while (b1 < e1) { 6039 j[t] = j1[b1]; 6040 imap1[t1] = t; 6041 b1 += jmap1[t1 + 1] - jmap1[t1]; 6042 t1++; 6043 t++; 6044 } 6045 while (b2 < e2) { 6046 j[t] = j2[b2]; 6047 imap2[t2] = t; 6048 b2 += jmap2[t2 + 1] - jmap2[t2]; 6049 t2++; 6050 t++; 6051 } 6052 i[r + 1] = t; 6053 } 6054 PetscFunctionReturn(0); 6055 } 6056 6057 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6058 6059 Input Parameters: 6060 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6061 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6062 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6063 6064 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6065 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6066 6067 Output Parameters: 6068 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6069 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6070 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6071 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6072 6073 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6074 Atot: number of entries belonging to the diagonal block. 6075 Annz: number of unique nonzeros belonging to the diagonal block. 6076 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6077 repeats (i.e., same 'i,j' pair). 6078 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6079 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6080 6081 Atot: number of entries belonging to the diagonal block 6082 Annz: number of unique nonzeros belonging to the diagonal block. 6083 6084 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6085 6086 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6087 */ 6088 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) { 6089 PetscInt cstart, cend, rstart, rend, row, col; 6090 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6091 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6092 PetscCount k, m, p, q, r, s, mid; 6093 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6094 6095 PetscFunctionBegin; 6096 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6097 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6098 m = rend - rstart; 6099 6100 for (k = 0; k < n; k++) { 6101 if (i[k] >= 0) break; 6102 } /* Skip negative rows */ 6103 6104 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6105 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6106 */ 6107 while (k < n) { 6108 row = i[k]; 6109 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6110 for (s = k; s < n; s++) 6111 if (i[s] != row) break; 6112 for (p = k; p < s; p++) { 6113 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6114 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6115 } 6116 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6117 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6118 rowBegin[row - rstart] = k; 6119 rowMid[row - rstart] = mid; 6120 rowEnd[row - rstart] = s; 6121 6122 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6123 Atot += mid - k; 6124 Btot += s - mid; 6125 6126 /* Count unique nonzeros of this diag/offdiag row */ 6127 for (p = k; p < mid;) { 6128 col = j[p]; 6129 do { 6130 j[p] += PETSC_MAX_INT; 6131 p++; 6132 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6133 Annz++; 6134 } 6135 6136 for (p = mid; p < s;) { 6137 col = j[p]; 6138 do { p++; } while (p < s && j[p] == col); 6139 Bnnz++; 6140 } 6141 k = s; 6142 } 6143 6144 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6145 PetscCall(PetscMalloc1(Atot, &Aperm)); 6146 PetscCall(PetscMalloc1(Btot, &Bperm)); 6147 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6148 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6149 6150 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6151 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6152 for (r = 0; r < m; r++) { 6153 k = rowBegin[r]; 6154 mid = rowMid[r]; 6155 s = rowEnd[r]; 6156 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6157 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6158 Atot += mid - k; 6159 Btot += s - mid; 6160 6161 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6162 for (p = k; p < mid;) { 6163 col = j[p]; 6164 q = p; 6165 do { p++; } while (p < mid && j[p] == col); 6166 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6167 Annz++; 6168 } 6169 6170 for (p = mid; p < s;) { 6171 col = j[p]; 6172 q = p; 6173 do { p++; } while (p < s && j[p] == col); 6174 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6175 Bnnz++; 6176 } 6177 } 6178 /* Output */ 6179 *Aperm_ = Aperm; 6180 *Annz_ = Annz; 6181 *Atot_ = Atot; 6182 *Ajmap_ = Ajmap; 6183 *Bperm_ = Bperm; 6184 *Bnnz_ = Bnnz; 6185 *Btot_ = Btot; 6186 *Bjmap_ = Bjmap; 6187 PetscFunctionReturn(0); 6188 } 6189 6190 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6191 6192 Input Parameters: 6193 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6194 nnz: number of unique nonzeros in the merged matrix 6195 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6196 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6197 6198 Output Parameter: (memory is allocated by the caller) 6199 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6200 6201 Example: 6202 nnz1 = 4 6203 nnz = 6 6204 imap = [1,3,4,5] 6205 jmap = [0,3,5,6,7] 6206 then, 6207 jmap_new = [0,0,3,3,5,6,7] 6208 */ 6209 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) { 6210 PetscCount k, p; 6211 6212 PetscFunctionBegin; 6213 jmap_new[0] = 0; 6214 p = nnz; /* p loops over jmap_new[] backwards */ 6215 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6216 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6217 } 6218 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6219 PetscFunctionReturn(0); 6220 } 6221 6222 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) { 6223 MPI_Comm comm; 6224 PetscMPIInt rank, size; 6225 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6226 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6227 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6228 6229 PetscFunctionBegin; 6230 PetscCall(PetscFree(mpiaij->garray)); 6231 PetscCall(VecDestroy(&mpiaij->lvec)); 6232 #if defined(PETSC_USE_CTABLE) 6233 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6234 #else 6235 PetscCall(PetscFree(mpiaij->colmap)); 6236 #endif 6237 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6238 mat->assembled = PETSC_FALSE; 6239 mat->was_assembled = PETSC_FALSE; 6240 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6241 6242 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6243 PetscCallMPI(MPI_Comm_size(comm, &size)); 6244 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6245 PetscCall(PetscLayoutSetUp(mat->rmap)); 6246 PetscCall(PetscLayoutSetUp(mat->cmap)); 6247 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6248 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6249 PetscCall(MatGetLocalSize(mat, &m, &n)); 6250 PetscCall(MatGetSize(mat, &M, &N)); 6251 6252 /* ---------------------------------------------------------------------------*/ 6253 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6254 /* entries come first, then local rows, then remote rows. */ 6255 /* ---------------------------------------------------------------------------*/ 6256 PetscCount n1 = coo_n, *perm1; 6257 PetscInt *i1 = coo_i, *j1 = coo_j; 6258 6259 PetscCall(PetscMalloc1(n1, &perm1)); 6260 for (k = 0; k < n1; k++) perm1[k] = k; 6261 6262 /* Manipulate indices so that entries with negative row or col indices will have smallest 6263 row indices, local entries will have greater but negative row indices, and remote entries 6264 will have positive row indices. 6265 */ 6266 for (k = 0; k < n1; k++) { 6267 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6268 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6269 else { 6270 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6271 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6272 } 6273 } 6274 6275 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6276 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6277 for (k = 0; k < n1; k++) { 6278 if (i1[k] > PETSC_MIN_INT) break; 6279 } /* Advance k to the first entry we need to take care of */ 6280 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6281 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6282 6283 /* ---------------------------------------------------------------------------*/ 6284 /* Split local rows into diag/offdiag portions */ 6285 /* ---------------------------------------------------------------------------*/ 6286 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6287 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6288 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6289 6290 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6291 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6292 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6293 6294 /* ---------------------------------------------------------------------------*/ 6295 /* Send remote rows to their owner */ 6296 /* ---------------------------------------------------------------------------*/ 6297 /* Find which rows should be sent to which remote ranks*/ 6298 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6299 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6300 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6301 const PetscInt *ranges; 6302 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6303 6304 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6305 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6306 for (k = rem; k < n1;) { 6307 PetscMPIInt owner; 6308 PetscInt firstRow, lastRow; 6309 6310 /* Locate a row range */ 6311 firstRow = i1[k]; /* first row of this owner */ 6312 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6313 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6314 6315 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6316 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6317 6318 /* All entries in [k,p) belong to this remote owner */ 6319 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6320 PetscMPIInt *sendto2; 6321 PetscInt *nentries2; 6322 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6323 6324 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6325 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6326 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6327 PetscCall(PetscFree2(sendto, nentries2)); 6328 sendto = sendto2; 6329 nentries = nentries2; 6330 maxNsend = maxNsend2; 6331 } 6332 sendto[nsend] = owner; 6333 nentries[nsend] = p - k; 6334 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6335 nsend++; 6336 k = p; 6337 } 6338 6339 /* Build 1st SF to know offsets on remote to send data */ 6340 PetscSF sf1; 6341 PetscInt nroots = 1, nroots2 = 0; 6342 PetscInt nleaves = nsend, nleaves2 = 0; 6343 PetscInt *offsets; 6344 PetscSFNode *iremote; 6345 6346 PetscCall(PetscSFCreate(comm, &sf1)); 6347 PetscCall(PetscMalloc1(nsend, &iremote)); 6348 PetscCall(PetscMalloc1(nsend, &offsets)); 6349 for (k = 0; k < nsend; k++) { 6350 iremote[k].rank = sendto[k]; 6351 iremote[k].index = 0; 6352 nleaves2 += nentries[k]; 6353 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6354 } 6355 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6356 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6357 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6358 PetscCall(PetscSFDestroy(&sf1)); 6359 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6360 6361 /* Build 2nd SF to send remote COOs to their owner */ 6362 PetscSF sf2; 6363 nroots = nroots2; 6364 nleaves = nleaves2; 6365 PetscCall(PetscSFCreate(comm, &sf2)); 6366 PetscCall(PetscSFSetFromOptions(sf2)); 6367 PetscCall(PetscMalloc1(nleaves, &iremote)); 6368 p = 0; 6369 for (k = 0; k < nsend; k++) { 6370 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6371 for (q = 0; q < nentries[k]; q++, p++) { 6372 iremote[p].rank = sendto[k]; 6373 iremote[p].index = offsets[k] + q; 6374 } 6375 } 6376 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6377 6378 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6379 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6380 6381 /* Send the remote COOs to their owner */ 6382 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6383 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6384 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6385 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6386 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6387 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6388 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6389 6390 PetscCall(PetscFree(offsets)); 6391 PetscCall(PetscFree2(sendto, nentries)); 6392 6393 /* ---------------------------------------------------------------*/ 6394 /* Sort received COOs by row along with the permutation array */ 6395 /* ---------------------------------------------------------------*/ 6396 for (k = 0; k < n2; k++) perm2[k] = k; 6397 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6398 6399 /* ---------------------------------------------------------------*/ 6400 /* Split received COOs into diag/offdiag portions */ 6401 /* ---------------------------------------------------------------*/ 6402 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6403 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6404 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6405 6406 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6407 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6408 6409 /* --------------------------------------------------------------------------*/ 6410 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6411 /* --------------------------------------------------------------------------*/ 6412 PetscInt *Ai, *Bi; 6413 PetscInt *Aj, *Bj; 6414 6415 PetscCall(PetscMalloc1(m + 1, &Ai)); 6416 PetscCall(PetscMalloc1(m + 1, &Bi)); 6417 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6418 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6419 6420 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6421 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6422 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6423 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6424 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6425 6426 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6427 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6428 6429 /* --------------------------------------------------------------------------*/ 6430 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6431 /* expect nonzeros in A/B most likely have local contributing entries */ 6432 /* --------------------------------------------------------------------------*/ 6433 PetscInt Annz = Ai[m]; 6434 PetscInt Bnnz = Bi[m]; 6435 PetscCount *Ajmap1_new, *Bjmap1_new; 6436 6437 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6438 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6439 6440 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6441 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6442 6443 PetscCall(PetscFree(Aimap1)); 6444 PetscCall(PetscFree(Ajmap1)); 6445 PetscCall(PetscFree(Bimap1)); 6446 PetscCall(PetscFree(Bjmap1)); 6447 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6448 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6449 PetscCall(PetscFree(perm1)); 6450 PetscCall(PetscFree3(i2, j2, perm2)); 6451 6452 Ajmap1 = Ajmap1_new; 6453 Bjmap1 = Bjmap1_new; 6454 6455 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6456 if (Annz < Annz1 + Annz2) { 6457 PetscInt *Aj_new; 6458 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6459 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6460 PetscCall(PetscFree(Aj)); 6461 Aj = Aj_new; 6462 } 6463 6464 if (Bnnz < Bnnz1 + Bnnz2) { 6465 PetscInt *Bj_new; 6466 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6467 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6468 PetscCall(PetscFree(Bj)); 6469 Bj = Bj_new; 6470 } 6471 6472 /* --------------------------------------------------------------------------------*/ 6473 /* Create new submatrices for on-process and off-process coupling */ 6474 /* --------------------------------------------------------------------------------*/ 6475 PetscScalar *Aa, *Ba; 6476 MatType rtype; 6477 Mat_SeqAIJ *a, *b; 6478 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6479 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6480 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6481 if (cstart) { 6482 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6483 } 6484 PetscCall(MatDestroy(&mpiaij->A)); 6485 PetscCall(MatDestroy(&mpiaij->B)); 6486 PetscCall(MatGetRootType_Private(mat, &rtype)); 6487 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6488 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6489 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6490 6491 a = (Mat_SeqAIJ *)mpiaij->A->data; 6492 b = (Mat_SeqAIJ *)mpiaij->B->data; 6493 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6494 a->free_a = b->free_a = PETSC_TRUE; 6495 a->free_ij = b->free_ij = PETSC_TRUE; 6496 6497 /* conversion must happen AFTER multiply setup */ 6498 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6499 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6500 PetscCall(VecDestroy(&mpiaij->lvec)); 6501 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6502 6503 mpiaij->coo_n = coo_n; 6504 mpiaij->coo_sf = sf2; 6505 mpiaij->sendlen = nleaves; 6506 mpiaij->recvlen = nroots; 6507 6508 mpiaij->Annz = Annz; 6509 mpiaij->Bnnz = Bnnz; 6510 6511 mpiaij->Annz2 = Annz2; 6512 mpiaij->Bnnz2 = Bnnz2; 6513 6514 mpiaij->Atot1 = Atot1; 6515 mpiaij->Atot2 = Atot2; 6516 mpiaij->Btot1 = Btot1; 6517 mpiaij->Btot2 = Btot2; 6518 6519 mpiaij->Ajmap1 = Ajmap1; 6520 mpiaij->Aperm1 = Aperm1; 6521 6522 mpiaij->Bjmap1 = Bjmap1; 6523 mpiaij->Bperm1 = Bperm1; 6524 6525 mpiaij->Aimap2 = Aimap2; 6526 mpiaij->Ajmap2 = Ajmap2; 6527 mpiaij->Aperm2 = Aperm2; 6528 6529 mpiaij->Bimap2 = Bimap2; 6530 mpiaij->Bjmap2 = Bjmap2; 6531 mpiaij->Bperm2 = Bperm2; 6532 6533 mpiaij->Cperm1 = Cperm1; 6534 6535 /* Allocate in preallocation. If not used, it has zero cost on host */ 6536 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6537 PetscFunctionReturn(0); 6538 } 6539 6540 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) { 6541 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6542 Mat A = mpiaij->A, B = mpiaij->B; 6543 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6544 PetscScalar *Aa, *Ba; 6545 PetscScalar *sendbuf = mpiaij->sendbuf; 6546 PetscScalar *recvbuf = mpiaij->recvbuf; 6547 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6548 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6549 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6550 const PetscCount *Cperm1 = mpiaij->Cperm1; 6551 6552 PetscFunctionBegin; 6553 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6554 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6555 6556 /* Pack entries to be sent to remote */ 6557 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6558 6559 /* Send remote entries to their owner and overlap the communication with local computation */ 6560 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6561 /* Add local entries to A and B */ 6562 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6563 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6564 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6565 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6566 } 6567 for (PetscCount i = 0; i < Bnnz; i++) { 6568 PetscScalar sum = 0.0; 6569 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6570 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6571 } 6572 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6573 6574 /* Add received remote entries to A and B */ 6575 for (PetscCount i = 0; i < Annz2; i++) { 6576 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6577 } 6578 for (PetscCount i = 0; i < Bnnz2; i++) { 6579 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6580 } 6581 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6582 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6583 PetscFunctionReturn(0); 6584 } 6585 6586 /* ----------------------------------------------------------------*/ 6587 6588 /*MC 6589 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6590 6591 Options Database Keys: 6592 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6593 6594 Level: beginner 6595 6596 Notes: 6597 `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values, 6598 in this case the values associated with the rows and columns one passes in are set to zero 6599 in the matrix 6600 6601 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6602 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6603 6604 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6605 M*/ 6606 6607 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) { 6608 Mat_MPIAIJ *b; 6609 PetscMPIInt size; 6610 6611 PetscFunctionBegin; 6612 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6613 6614 PetscCall(PetscNew(&b)); 6615 B->data = (void *)b; 6616 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6617 B->assembled = PETSC_FALSE; 6618 B->insertmode = NOT_SET_VALUES; 6619 b->size = size; 6620 6621 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6622 6623 /* build cache for off array entries formed */ 6624 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6625 6626 b->donotstash = PETSC_FALSE; 6627 b->colmap = NULL; 6628 b->garray = NULL; 6629 b->roworiented = PETSC_TRUE; 6630 6631 /* stuff used for matrix vector multiply */ 6632 b->lvec = NULL; 6633 b->Mvctx = NULL; 6634 6635 /* stuff for MatGetRow() */ 6636 b->rowindices = NULL; 6637 b->rowvalues = NULL; 6638 b->getrowactive = PETSC_FALSE; 6639 6640 /* flexible pointer used in CUSPARSE classes */ 6641 b->spptr = NULL; 6642 6643 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6644 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6645 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6646 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6647 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6648 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6649 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6650 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6651 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6652 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6653 #if defined(PETSC_HAVE_CUDA) 6654 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6655 #endif 6656 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6657 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6658 #endif 6659 #if defined(PETSC_HAVE_MKL_SPARSE) 6660 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6661 #endif 6662 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6663 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6664 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6665 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6666 #if defined(PETSC_HAVE_ELEMENTAL) 6667 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6668 #endif 6669 #if defined(PETSC_HAVE_SCALAPACK) 6670 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6671 #endif 6672 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6673 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6674 #if defined(PETSC_HAVE_HYPRE) 6675 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6676 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6677 #endif 6678 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6679 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6680 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6681 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6682 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6683 PetscFunctionReturn(0); 6684 } 6685 6686 /*@C 6687 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6688 and "off-diagonal" part of the matrix in CSR format. 6689 6690 Collective 6691 6692 Input Parameters: 6693 + comm - MPI communicator 6694 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6695 . n - This value should be the same as the local size used in creating the 6696 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6697 calculated if N is given) For square matrices n is almost always m. 6698 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 6699 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 6700 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6701 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6702 . a - matrix values 6703 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6704 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6705 - oa - matrix values 6706 6707 Output Parameter: 6708 . mat - the matrix 6709 6710 Level: advanced 6711 6712 Notes: 6713 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6714 must free the arrays once the matrix has been destroyed and not before. 6715 6716 The i and j indices are 0 based 6717 6718 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6719 6720 This sets local rows and cannot be used to set off-processor values. 6721 6722 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6723 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6724 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6725 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6726 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6727 communication if it is known that only local entries will be set. 6728 6729 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6730 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6731 @*/ 6732 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) { 6733 Mat_MPIAIJ *maij; 6734 6735 PetscFunctionBegin; 6736 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6737 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6738 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6739 PetscCall(MatCreate(comm, mat)); 6740 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6741 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6742 maij = (Mat_MPIAIJ *)(*mat)->data; 6743 6744 (*mat)->preallocated = PETSC_TRUE; 6745 6746 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6747 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6748 6749 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6750 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6751 6752 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6753 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6754 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6755 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6756 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6757 PetscFunctionReturn(0); 6758 } 6759 6760 typedef struct { 6761 Mat *mp; /* intermediate products */ 6762 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6763 PetscInt cp; /* number of intermediate products */ 6764 6765 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6766 PetscInt *startsj_s, *startsj_r; 6767 PetscScalar *bufa; 6768 Mat P_oth; 6769 6770 /* may take advantage of merging product->B */ 6771 Mat Bloc; /* B-local by merging diag and off-diag */ 6772 6773 /* cusparse does not have support to split between symbolic and numeric phases. 6774 When api_user is true, we don't need to update the numerical values 6775 of the temporary storage */ 6776 PetscBool reusesym; 6777 6778 /* support for COO values insertion */ 6779 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6780 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6781 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6782 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6783 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6784 PetscMemType mtype; 6785 6786 /* customization */ 6787 PetscBool abmerge; 6788 PetscBool P_oth_bind; 6789 } MatMatMPIAIJBACKEND; 6790 6791 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) { 6792 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6793 PetscInt i; 6794 6795 PetscFunctionBegin; 6796 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6797 PetscCall(PetscFree(mmdata->bufa)); 6798 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6799 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6800 PetscCall(MatDestroy(&mmdata->P_oth)); 6801 PetscCall(MatDestroy(&mmdata->Bloc)); 6802 PetscCall(PetscSFDestroy(&mmdata->sf)); 6803 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6804 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6805 PetscCall(PetscFree(mmdata->own[0])); 6806 PetscCall(PetscFree(mmdata->own)); 6807 PetscCall(PetscFree(mmdata->off[0])); 6808 PetscCall(PetscFree(mmdata->off)); 6809 PetscCall(PetscFree(mmdata)); 6810 PetscFunctionReturn(0); 6811 } 6812 6813 /* Copy selected n entries with indices in idx[] of A to v[]. 6814 If idx is NULL, copy the whole data array of A to v[] 6815 */ 6816 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) { 6817 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6818 6819 PetscFunctionBegin; 6820 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 6821 if (f) { 6822 PetscCall((*f)(A, n, idx, v)); 6823 } else { 6824 const PetscScalar *vv; 6825 6826 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 6827 if (n && idx) { 6828 PetscScalar *w = v; 6829 const PetscInt *oi = idx; 6830 PetscInt j; 6831 6832 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6833 } else { 6834 PetscCall(PetscArraycpy(v, vv, n)); 6835 } 6836 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 6837 } 6838 PetscFunctionReturn(0); 6839 } 6840 6841 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) { 6842 MatMatMPIAIJBACKEND *mmdata; 6843 PetscInt i, n_d, n_o; 6844 6845 PetscFunctionBegin; 6846 MatCheckProduct(C, 1); 6847 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 6848 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 6849 if (!mmdata->reusesym) { /* update temporary matrices */ 6850 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 6851 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 6852 } 6853 mmdata->reusesym = PETSC_FALSE; 6854 6855 for (i = 0; i < mmdata->cp; i++) { 6856 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 6857 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6858 } 6859 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6860 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 6861 6862 if (mmdata->mptmp[i]) continue; 6863 if (noff) { 6864 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 6865 6866 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 6867 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 6868 n_o += noff; 6869 n_d += nown; 6870 } else { 6871 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 6872 6873 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 6874 n_d += mm->nz; 6875 } 6876 } 6877 if (mmdata->hasoffproc) { /* offprocess insertion */ 6878 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 6879 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 6880 } 6881 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 6882 PetscFunctionReturn(0); 6883 } 6884 6885 /* Support for Pt * A, A * P, or Pt * A * P */ 6886 #define MAX_NUMBER_INTERMEDIATE 4 6887 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) { 6888 Mat_Product *product = C->product; 6889 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6890 Mat_MPIAIJ *a, *p; 6891 MatMatMPIAIJBACKEND *mmdata; 6892 ISLocalToGlobalMapping P_oth_l2g = NULL; 6893 IS glob = NULL; 6894 const char *prefix; 6895 char pprefix[256]; 6896 const PetscInt *globidx, *P_oth_idx; 6897 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 6898 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 6899 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6900 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6901 /* a base offset; type-2: sparse with a local to global map table */ 6902 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6903 6904 MatProductType ptype; 6905 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk; 6906 PetscMPIInt size; 6907 6908 PetscFunctionBegin; 6909 MatCheckProduct(C, 1); 6910 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 6911 ptype = product->type; 6912 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 6913 ptype = MATPRODUCT_AB; 6914 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6915 } 6916 switch (ptype) { 6917 case MATPRODUCT_AB: 6918 A = product->A; 6919 P = product->B; 6920 m = A->rmap->n; 6921 n = P->cmap->n; 6922 M = A->rmap->N; 6923 N = P->cmap->N; 6924 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6925 break; 6926 case MATPRODUCT_AtB: 6927 P = product->A; 6928 A = product->B; 6929 m = P->cmap->n; 6930 n = A->cmap->n; 6931 M = P->cmap->N; 6932 N = A->cmap->N; 6933 hasoffproc = PETSC_TRUE; 6934 break; 6935 case MATPRODUCT_PtAP: 6936 A = product->A; 6937 P = product->B; 6938 m = P->cmap->n; 6939 n = P->cmap->n; 6940 M = P->cmap->N; 6941 N = P->cmap->N; 6942 hasoffproc = PETSC_TRUE; 6943 break; 6944 default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 6945 } 6946 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 6947 if (size == 1) hasoffproc = PETSC_FALSE; 6948 6949 /* defaults */ 6950 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 6951 mp[i] = NULL; 6952 mptmp[i] = PETSC_FALSE; 6953 rmapt[i] = -1; 6954 cmapt[i] = -1; 6955 rmapa[i] = NULL; 6956 cmapa[i] = NULL; 6957 } 6958 6959 /* customization */ 6960 PetscCall(PetscNew(&mmdata)); 6961 mmdata->reusesym = product->api_user; 6962 if (ptype == MATPRODUCT_AB) { 6963 if (product->api_user) { 6964 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 6965 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 6966 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 6967 PetscOptionsEnd(); 6968 } else { 6969 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 6970 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 6971 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 6972 PetscOptionsEnd(); 6973 } 6974 } else if (ptype == MATPRODUCT_PtAP) { 6975 if (product->api_user) { 6976 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 6977 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 6978 PetscOptionsEnd(); 6979 } else { 6980 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 6981 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 6982 PetscOptionsEnd(); 6983 } 6984 } 6985 a = (Mat_MPIAIJ *)A->data; 6986 p = (Mat_MPIAIJ *)P->data; 6987 PetscCall(MatSetSizes(C, m, n, M, N)); 6988 PetscCall(PetscLayoutSetUp(C->rmap)); 6989 PetscCall(PetscLayoutSetUp(C->cmap)); 6990 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6991 PetscCall(MatGetOptionsPrefix(C, &prefix)); 6992 6993 cp = 0; 6994 switch (ptype) { 6995 case MATPRODUCT_AB: /* A * P */ 6996 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 6997 6998 /* A_diag * P_local (merged or not) */ 6999 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7000 /* P is product->B */ 7001 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7002 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7003 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7004 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7005 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7006 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7007 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7008 mp[cp]->product->api_user = product->api_user; 7009 PetscCall(MatProductSetFromOptions(mp[cp])); 7010 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7011 PetscCall(ISGetIndices(glob, &globidx)); 7012 rmapt[cp] = 1; 7013 cmapt[cp] = 2; 7014 cmapa[cp] = globidx; 7015 mptmp[cp] = PETSC_FALSE; 7016 cp++; 7017 } else { /* A_diag * P_diag and A_diag * P_off */ 7018 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7019 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7020 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7021 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7022 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7023 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7024 mp[cp]->product->api_user = product->api_user; 7025 PetscCall(MatProductSetFromOptions(mp[cp])); 7026 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7027 rmapt[cp] = 1; 7028 cmapt[cp] = 1; 7029 mptmp[cp] = PETSC_FALSE; 7030 cp++; 7031 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7032 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7033 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7034 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7035 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7036 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7037 mp[cp]->product->api_user = product->api_user; 7038 PetscCall(MatProductSetFromOptions(mp[cp])); 7039 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7040 rmapt[cp] = 1; 7041 cmapt[cp] = 2; 7042 cmapa[cp] = p->garray; 7043 mptmp[cp] = PETSC_FALSE; 7044 cp++; 7045 } 7046 7047 /* A_off * P_other */ 7048 if (mmdata->P_oth) { 7049 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7050 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7051 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7052 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7053 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7054 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7055 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7056 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7057 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7058 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7059 mp[cp]->product->api_user = product->api_user; 7060 PetscCall(MatProductSetFromOptions(mp[cp])); 7061 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7062 rmapt[cp] = 1; 7063 cmapt[cp] = 2; 7064 cmapa[cp] = P_oth_idx; 7065 mptmp[cp] = PETSC_FALSE; 7066 cp++; 7067 } 7068 break; 7069 7070 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7071 /* A is product->B */ 7072 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7073 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7074 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7075 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7076 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7077 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7078 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7079 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7080 mp[cp]->product->api_user = product->api_user; 7081 PetscCall(MatProductSetFromOptions(mp[cp])); 7082 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7083 PetscCall(ISGetIndices(glob, &globidx)); 7084 rmapt[cp] = 2; 7085 rmapa[cp] = globidx; 7086 cmapt[cp] = 2; 7087 cmapa[cp] = globidx; 7088 mptmp[cp] = PETSC_FALSE; 7089 cp++; 7090 } else { 7091 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7092 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7093 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7094 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7095 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7096 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7097 mp[cp]->product->api_user = product->api_user; 7098 PetscCall(MatProductSetFromOptions(mp[cp])); 7099 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7100 PetscCall(ISGetIndices(glob, &globidx)); 7101 rmapt[cp] = 1; 7102 cmapt[cp] = 2; 7103 cmapa[cp] = globidx; 7104 mptmp[cp] = PETSC_FALSE; 7105 cp++; 7106 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7107 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7108 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7109 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7110 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7111 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7112 mp[cp]->product->api_user = product->api_user; 7113 PetscCall(MatProductSetFromOptions(mp[cp])); 7114 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7115 rmapt[cp] = 2; 7116 rmapa[cp] = p->garray; 7117 cmapt[cp] = 2; 7118 cmapa[cp] = globidx; 7119 mptmp[cp] = PETSC_FALSE; 7120 cp++; 7121 } 7122 break; 7123 case MATPRODUCT_PtAP: 7124 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7125 /* P is product->B */ 7126 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7127 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7128 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7129 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7130 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7131 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7132 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7133 mp[cp]->product->api_user = product->api_user; 7134 PetscCall(MatProductSetFromOptions(mp[cp])); 7135 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7136 PetscCall(ISGetIndices(glob, &globidx)); 7137 rmapt[cp] = 2; 7138 rmapa[cp] = globidx; 7139 cmapt[cp] = 2; 7140 cmapa[cp] = globidx; 7141 mptmp[cp] = PETSC_FALSE; 7142 cp++; 7143 if (mmdata->P_oth) { 7144 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7145 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7146 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7147 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7148 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7149 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7150 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7151 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7152 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7153 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7154 mp[cp]->product->api_user = product->api_user; 7155 PetscCall(MatProductSetFromOptions(mp[cp])); 7156 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7157 mptmp[cp] = PETSC_TRUE; 7158 cp++; 7159 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7160 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7161 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7162 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7163 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7164 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7165 mp[cp]->product->api_user = product->api_user; 7166 PetscCall(MatProductSetFromOptions(mp[cp])); 7167 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7168 rmapt[cp] = 2; 7169 rmapa[cp] = globidx; 7170 cmapt[cp] = 2; 7171 cmapa[cp] = P_oth_idx; 7172 mptmp[cp] = PETSC_FALSE; 7173 cp++; 7174 } 7175 break; 7176 default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7177 } 7178 /* sanity check */ 7179 if (size > 1) 7180 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7181 7182 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7183 for (i = 0; i < cp; i++) { 7184 mmdata->mp[i] = mp[i]; 7185 mmdata->mptmp[i] = mptmp[i]; 7186 } 7187 mmdata->cp = cp; 7188 C->product->data = mmdata; 7189 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7190 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7191 7192 /* memory type */ 7193 mmdata->mtype = PETSC_MEMTYPE_HOST; 7194 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7195 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7196 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7197 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7198 7199 /* prepare coo coordinates for values insertion */ 7200 7201 /* count total nonzeros of those intermediate seqaij Mats 7202 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7203 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7204 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7205 */ 7206 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7207 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7208 if (mptmp[cp]) continue; 7209 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7210 const PetscInt *rmap = rmapa[cp]; 7211 const PetscInt mr = mp[cp]->rmap->n; 7212 const PetscInt rs = C->rmap->rstart; 7213 const PetscInt re = C->rmap->rend; 7214 const PetscInt *ii = mm->i; 7215 for (i = 0; i < mr; i++) { 7216 const PetscInt gr = rmap[i]; 7217 const PetscInt nz = ii[i + 1] - ii[i]; 7218 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7219 else ncoo_oown += nz; /* this row is local */ 7220 } 7221 } else ncoo_d += mm->nz; 7222 } 7223 7224 /* 7225 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7226 7227 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7228 7229 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7230 7231 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7232 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7233 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7234 7235 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7236 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7237 */ 7238 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7239 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7240 7241 /* gather (i,j) of nonzeros inserted by remote procs */ 7242 if (hasoffproc) { 7243 PetscSF msf; 7244 PetscInt ncoo2, *coo_i2, *coo_j2; 7245 7246 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7247 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7248 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7249 7250 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7251 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7252 PetscInt *idxoff = mmdata->off[cp]; 7253 PetscInt *idxown = mmdata->own[cp]; 7254 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7255 const PetscInt *rmap = rmapa[cp]; 7256 const PetscInt *cmap = cmapa[cp]; 7257 const PetscInt *ii = mm->i; 7258 PetscInt *coi = coo_i + ncoo_o; 7259 PetscInt *coj = coo_j + ncoo_o; 7260 const PetscInt mr = mp[cp]->rmap->n; 7261 const PetscInt rs = C->rmap->rstart; 7262 const PetscInt re = C->rmap->rend; 7263 const PetscInt cs = C->cmap->rstart; 7264 for (i = 0; i < mr; i++) { 7265 const PetscInt *jj = mm->j + ii[i]; 7266 const PetscInt gr = rmap[i]; 7267 const PetscInt nz = ii[i + 1] - ii[i]; 7268 if (gr < rs || gr >= re) { /* this is an offproc row */ 7269 for (j = ii[i]; j < ii[i + 1]; j++) { 7270 *coi++ = gr; 7271 *idxoff++ = j; 7272 } 7273 if (!cmapt[cp]) { /* already global */ 7274 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7275 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7276 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7277 } else { /* offdiag */ 7278 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7279 } 7280 ncoo_o += nz; 7281 } else { /* this is a local row */ 7282 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7283 } 7284 } 7285 } 7286 mmdata->off[cp + 1] = idxoff; 7287 mmdata->own[cp + 1] = idxown; 7288 } 7289 7290 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7291 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7292 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7293 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7294 ncoo = ncoo_d + ncoo_oown + ncoo2; 7295 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7296 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7297 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7298 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7299 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7300 PetscCall(PetscFree2(coo_i, coo_j)); 7301 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7302 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7303 coo_i = coo_i2; 7304 coo_j = coo_j2; 7305 } else { /* no offproc values insertion */ 7306 ncoo = ncoo_d; 7307 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7308 7309 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7310 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7311 PetscCall(PetscSFSetUp(mmdata->sf)); 7312 } 7313 mmdata->hasoffproc = hasoffproc; 7314 7315 /* gather (i,j) of nonzeros inserted locally */ 7316 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7317 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7318 PetscInt *coi = coo_i + ncoo_d; 7319 PetscInt *coj = coo_j + ncoo_d; 7320 const PetscInt *jj = mm->j; 7321 const PetscInt *ii = mm->i; 7322 const PetscInt *cmap = cmapa[cp]; 7323 const PetscInt *rmap = rmapa[cp]; 7324 const PetscInt mr = mp[cp]->rmap->n; 7325 const PetscInt rs = C->rmap->rstart; 7326 const PetscInt re = C->rmap->rend; 7327 const PetscInt cs = C->cmap->rstart; 7328 7329 if (mptmp[cp]) continue; 7330 if (rmapt[cp] == 1) { /* consecutive rows */ 7331 /* fill coo_i */ 7332 for (i = 0; i < mr; i++) { 7333 const PetscInt gr = i + rs; 7334 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7335 } 7336 /* fill coo_j */ 7337 if (!cmapt[cp]) { /* type-0, already global */ 7338 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7339 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7340 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7341 } else { /* type-2, local to global for sparse columns */ 7342 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7343 } 7344 ncoo_d += mm->nz; 7345 } else if (rmapt[cp] == 2) { /* sparse rows */ 7346 for (i = 0; i < mr; i++) { 7347 const PetscInt *jj = mm->j + ii[i]; 7348 const PetscInt gr = rmap[i]; 7349 const PetscInt nz = ii[i + 1] - ii[i]; 7350 if (gr >= rs && gr < re) { /* local rows */ 7351 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7352 if (!cmapt[cp]) { /* type-0, already global */ 7353 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7354 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7355 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7356 } else { /* type-2, local to global for sparse columns */ 7357 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7358 } 7359 ncoo_d += nz; 7360 } 7361 } 7362 } 7363 } 7364 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7365 PetscCall(ISDestroy(&glob)); 7366 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7367 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7368 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7369 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7370 7371 /* preallocate with COO data */ 7372 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7373 PetscCall(PetscFree2(coo_i, coo_j)); 7374 PetscFunctionReturn(0); 7375 } 7376 7377 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) { 7378 Mat_Product *product = mat->product; 7379 #if defined(PETSC_HAVE_DEVICE) 7380 PetscBool match = PETSC_FALSE; 7381 PetscBool usecpu = PETSC_FALSE; 7382 #else 7383 PetscBool match = PETSC_TRUE; 7384 #endif 7385 7386 PetscFunctionBegin; 7387 MatCheckProduct(mat, 1); 7388 #if defined(PETSC_HAVE_DEVICE) 7389 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7390 if (match) { /* we can always fallback to the CPU if requested */ 7391 switch (product->type) { 7392 case MATPRODUCT_AB: 7393 if (product->api_user) { 7394 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7395 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7396 PetscOptionsEnd(); 7397 } else { 7398 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7399 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7400 PetscOptionsEnd(); 7401 } 7402 break; 7403 case MATPRODUCT_AtB: 7404 if (product->api_user) { 7405 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7406 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7407 PetscOptionsEnd(); 7408 } else { 7409 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7410 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7411 PetscOptionsEnd(); 7412 } 7413 break; 7414 case MATPRODUCT_PtAP: 7415 if (product->api_user) { 7416 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7417 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7418 PetscOptionsEnd(); 7419 } else { 7420 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7421 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7422 PetscOptionsEnd(); 7423 } 7424 break; 7425 default: break; 7426 } 7427 match = (PetscBool)!usecpu; 7428 } 7429 #endif 7430 if (match) { 7431 switch (product->type) { 7432 case MATPRODUCT_AB: 7433 case MATPRODUCT_AtB: 7434 case MATPRODUCT_PtAP: mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; break; 7435 default: break; 7436 } 7437 } 7438 /* fallback to MPIAIJ ops */ 7439 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7440 PetscFunctionReturn(0); 7441 } 7442 7443 /* 7444 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7445 7446 n - the number of block indices in cc[] 7447 cc - the block indices (must be large enough to contain the indices) 7448 */ 7449 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) { 7450 PetscInt cnt = -1, nidx, j; 7451 const PetscInt *idx; 7452 7453 PetscFunctionBegin; 7454 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7455 if (nidx) { 7456 cnt = 0; 7457 cc[cnt] = idx[0] / bs; 7458 for (j = 1; j < nidx; j++) { 7459 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7460 } 7461 } 7462 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7463 *n = cnt + 1; 7464 PetscFunctionReturn(0); 7465 } 7466 7467 /* 7468 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7469 7470 ncollapsed - the number of block indices 7471 collapsed - the block indices (must be large enough to contain the indices) 7472 */ 7473 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) { 7474 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7475 7476 PetscFunctionBegin; 7477 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7478 for (i = start + 1; i < start + bs; i++) { 7479 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7480 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7481 cprevtmp = cprev; 7482 cprev = merged; 7483 merged = cprevtmp; 7484 } 7485 *ncollapsed = nprev; 7486 if (collapsed) *collapsed = cprev; 7487 PetscFunctionReturn(0); 7488 } 7489 7490 /* -------------------------------------------------------------------------- */ 7491 /* 7492 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7493 7494 Input Parameter: 7495 . Amat - matrix 7496 - symmetrize - make the result symmetric 7497 + scale - scale with diagonal 7498 7499 Output Parameter: 7500 . a_Gmat - output scalar graph >= 0 7501 7502 */ 7503 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) { 7504 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7505 MPI_Comm comm; 7506 Mat Gmat; 7507 PetscBool ismpiaij, isseqaij; 7508 Mat a, b, c; 7509 MatType jtype; 7510 7511 PetscFunctionBegin; 7512 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7513 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7514 PetscCall(MatGetSize(Amat, &MM, &NN)); 7515 PetscCall(MatGetBlockSize(Amat, &bs)); 7516 nloc = (Iend - Istart) / bs; 7517 7518 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7519 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7520 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7521 7522 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7523 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7524 implementation */ 7525 if (bs > 1) { 7526 PetscCall(MatGetType(Amat, &jtype)); 7527 PetscCall(MatCreate(comm, &Gmat)); 7528 PetscCall(MatSetType(Gmat, jtype)); 7529 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7530 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7531 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7532 PetscInt *d_nnz, *o_nnz; 7533 MatScalar *aa, val, AA[4096]; 7534 PetscInt *aj, *ai, AJ[4096], nc; 7535 if (isseqaij) { 7536 a = Amat; 7537 b = NULL; 7538 } else { 7539 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7540 a = d->A; 7541 b = d->B; 7542 } 7543 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7544 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7545 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7546 PetscInt *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0; 7547 const PetscInt *cols; 7548 for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows 7549 PetscCall(MatGetRow(c, brow, &jj, &cols, NULL)); 7550 nnz[brow / bs] = jj / bs; 7551 if (jj % bs) ok = 0; 7552 if (cols) j0 = cols[0]; 7553 else j0 = -1; 7554 PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL)); 7555 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7556 for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks 7557 PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL)); 7558 if (jj % bs) ok = 0; 7559 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7560 if (nnz[brow / bs] != jj / bs) ok = 0; 7561 PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL)); 7562 } 7563 if (!ok) { 7564 PetscCall(PetscFree2(d_nnz, o_nnz)); 7565 goto old_bs; 7566 } 7567 } 7568 PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax); 7569 } 7570 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7571 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7572 PetscCall(PetscFree2(d_nnz, o_nnz)); 7573 // diag 7574 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7575 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7576 ai = aseq->i; 7577 n = ai[brow + 1] - ai[brow]; 7578 aj = aseq->j + ai[brow]; 7579 for (int k = 0; k < n; k += bs) { // block columns 7580 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7581 val = 0; 7582 for (int ii = 0; ii < bs; ii++) { // rows in block 7583 aa = aseq->a + ai[brow + ii] + k; 7584 for (int jj = 0; jj < bs; jj++) { // columns in block 7585 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7586 } 7587 } 7588 AA[k / bs] = val; 7589 } 7590 grow = Istart / bs + brow / bs; 7591 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7592 } 7593 // off-diag 7594 if (ismpiaij) { 7595 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7596 const PetscScalar *vals; 7597 const PetscInt *cols, *garray = aij->garray; 7598 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7599 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7600 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7601 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7602 AA[k / bs] = 0; 7603 AJ[cidx] = garray[cols[k]] / bs; 7604 } 7605 nc = ncols / bs; 7606 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7607 for (int ii = 0; ii < bs; ii++) { // rows in block 7608 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7609 for (int k = 0; k < ncols; k += bs) { 7610 for (int jj = 0; jj < bs; jj++) { // cols in block 7611 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7612 } 7613 } 7614 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7615 } 7616 grow = Istart / bs + brow / bs; 7617 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7618 } 7619 } 7620 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7621 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7622 } else { 7623 const PetscScalar *vals; 7624 const PetscInt *idx; 7625 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7626 old_bs: 7627 /* 7628 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7629 */ 7630 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7631 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7632 if (isseqaij) { 7633 PetscInt max_d_nnz; 7634 /* 7635 Determine exact preallocation count for (sequential) scalar matrix 7636 */ 7637 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7638 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7639 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7640 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7641 PetscCall(PetscFree3(w0, w1, w2)); 7642 } else if (ismpiaij) { 7643 Mat Daij, Oaij; 7644 const PetscInt *garray; 7645 PetscInt max_d_nnz; 7646 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7647 /* 7648 Determine exact preallocation count for diagonal block portion of scalar matrix 7649 */ 7650 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7651 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7652 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7653 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7654 PetscCall(PetscFree3(w0, w1, w2)); 7655 /* 7656 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7657 */ 7658 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7659 o_nnz[jj] = 0; 7660 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7661 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7662 o_nnz[jj] += ncols; 7663 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7664 } 7665 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7666 } 7667 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7668 /* get scalar copy (norms) of matrix */ 7669 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7670 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7671 PetscCall(PetscFree2(d_nnz, o_nnz)); 7672 for (Ii = Istart; Ii < Iend; Ii++) { 7673 PetscInt dest_row = Ii / bs; 7674 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7675 for (jj = 0; jj < ncols; jj++) { 7676 PetscInt dest_col = idx[jj] / bs; 7677 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7678 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7679 } 7680 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7681 } 7682 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7683 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7684 } 7685 } else { 7686 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7687 procedure via MatAbs API */ 7688 /* just copy scalar matrix & abs() */ 7689 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7690 if (isseqaij) { 7691 a = Gmat; 7692 b = NULL; 7693 } else { 7694 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7695 a = d->A; 7696 b = d->B; 7697 } 7698 /* abs */ 7699 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7700 MatInfo info; 7701 PetscScalar *avals; 7702 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7703 PetscCall(MatSeqAIJGetArray(c, &avals)); 7704 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7705 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7706 } 7707 } 7708 if (symmetrize) { 7709 PetscBool isset, issym; 7710 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7711 if (!isset || !issym) { 7712 Mat matTrans; 7713 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7714 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7715 PetscCall(MatDestroy(&matTrans)); 7716 } 7717 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7718 } else { 7719 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7720 } 7721 if (scale) { 7722 /* scale c for all diagonal values = 1 or -1 */ 7723 Vec diag; 7724 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7725 PetscCall(MatGetDiagonal(Gmat, diag)); 7726 PetscCall(VecReciprocal(diag)); 7727 PetscCall(VecSqrtAbs(diag)); 7728 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7729 PetscCall(VecDestroy(&diag)); 7730 } 7731 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7732 *a_Gmat = Gmat; 7733 PetscFunctionReturn(0); 7734 } 7735 7736 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) { 7737 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7738 Mat tGmat; 7739 MPI_Comm comm; 7740 const PetscScalar *vals; 7741 const PetscInt *idx; 7742 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7743 MatScalar *AA; // this is checked in graph 7744 PetscBool isseqaij; 7745 Mat a, b, c; 7746 MatType jtype; 7747 7748 PetscFunctionBegin; 7749 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7750 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7751 PetscCall(MatGetType(Gmat, &jtype)); 7752 PetscCall(MatCreate(comm, &tGmat)); 7753 PetscCall(MatSetType(tGmat, jtype)); 7754 7755 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7756 Also, if the matrix is symmetric, can we skip this 7757 operation? It can be very expensive on large matrices. */ 7758 7759 // global sizes 7760 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7761 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7762 nloc = Iend - Istart; 7763 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7764 if (isseqaij) { 7765 a = Gmat; 7766 b = NULL; 7767 } else { 7768 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7769 a = d->A; 7770 b = d->B; 7771 garray = d->garray; 7772 } 7773 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7774 for (PetscInt row = 0; row < nloc; row++) { 7775 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7776 d_nnz[row] = ncols; 7777 if (ncols > maxcols) maxcols = ncols; 7778 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7779 } 7780 if (b) { 7781 for (PetscInt row = 0; row < nloc; row++) { 7782 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7783 o_nnz[row] = ncols; 7784 if (ncols > maxcols) maxcols = ncols; 7785 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7786 } 7787 } 7788 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7789 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7790 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7791 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7792 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7793 PetscCall(PetscFree2(d_nnz, o_nnz)); 7794 // 7795 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7796 nnz0 = nnz1 = 0; 7797 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7798 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7799 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7800 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7801 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7802 if (PetscRealPart(sv) > vfilter) { 7803 nnz1++; 7804 PetscInt cid = idx[jj] + Istart; //diag 7805 if (c != a) cid = garray[idx[jj]]; 7806 AA[ncol_row] = vals[jj]; 7807 AJ[ncol_row] = cid; 7808 ncol_row++; 7809 } 7810 } 7811 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7812 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7813 } 7814 } 7815 PetscCall(PetscFree2(AA, AJ)); 7816 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7817 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7818 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7819 7820 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7821 7822 *filteredG = tGmat; 7823 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7824 PetscFunctionReturn(0); 7825 } 7826 7827 /* 7828 Special version for direct calls from Fortran 7829 */ 7830 #include <petsc/private/fortranimpl.h> 7831 7832 /* Change these macros so can be used in void function */ 7833 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7834 #undef PetscCall 7835 #define PetscCall(...) \ 7836 do { \ 7837 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7838 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7839 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 7840 return; \ 7841 } \ 7842 } while (0) 7843 7844 #undef SETERRQ 7845 #define SETERRQ(comm, ierr, ...) \ 7846 do { \ 7847 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 7848 return; \ 7849 } while (0) 7850 7851 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7852 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7853 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7854 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7855 #else 7856 #endif 7857 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) { 7858 Mat mat = *mmat; 7859 PetscInt m = *mm, n = *mn; 7860 InsertMode addv = *maddv; 7861 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 7862 PetscScalar value; 7863 7864 MatCheckPreallocated(mat, 1); 7865 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7866 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 7867 { 7868 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 7869 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 7870 PetscBool roworiented = aij->roworiented; 7871 7872 /* Some Variables required in the macro */ 7873 Mat A = aij->A; 7874 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 7875 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 7876 MatScalar *aa; 7877 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7878 Mat B = aij->B; 7879 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 7880 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 7881 MatScalar *ba; 7882 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7883 * cannot use "#if defined" inside a macro. */ 7884 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7885 7886 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 7887 PetscInt nonew = a->nonew; 7888 MatScalar *ap1, *ap2; 7889 7890 PetscFunctionBegin; 7891 PetscCall(MatSeqAIJGetArray(A, &aa)); 7892 PetscCall(MatSeqAIJGetArray(B, &ba)); 7893 for (i = 0; i < m; i++) { 7894 if (im[i] < 0) continue; 7895 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 7896 if (im[i] >= rstart && im[i] < rend) { 7897 row = im[i] - rstart; 7898 lastcol1 = -1; 7899 rp1 = aj + ai[row]; 7900 ap1 = aa + ai[row]; 7901 rmax1 = aimax[row]; 7902 nrow1 = ailen[row]; 7903 low1 = 0; 7904 high1 = nrow1; 7905 lastcol2 = -1; 7906 rp2 = bj + bi[row]; 7907 ap2 = ba + bi[row]; 7908 rmax2 = bimax[row]; 7909 nrow2 = bilen[row]; 7910 low2 = 0; 7911 high2 = nrow2; 7912 7913 for (j = 0; j < n; j++) { 7914 if (roworiented) value = v[i * n + j]; 7915 else value = v[i + j * m]; 7916 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7917 if (in[j] >= cstart && in[j] < cend) { 7918 col = in[j] - cstart; 7919 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 7920 } else if (in[j] < 0) continue; 7921 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7922 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7923 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 7924 } else { 7925 if (mat->was_assembled) { 7926 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7927 #if defined(PETSC_USE_CTABLE) 7928 PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); 7929 col--; 7930 #else 7931 col = aij->colmap[in[j]] - 1; 7932 #endif 7933 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 7934 PetscCall(MatDisAssemble_MPIAIJ(mat)); 7935 col = in[j]; 7936 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7937 B = aij->B; 7938 b = (Mat_SeqAIJ *)B->data; 7939 bimax = b->imax; 7940 bi = b->i; 7941 bilen = b->ilen; 7942 bj = b->j; 7943 rp2 = bj + bi[row]; 7944 ap2 = ba + bi[row]; 7945 rmax2 = bimax[row]; 7946 nrow2 = bilen[row]; 7947 low2 = 0; 7948 high2 = nrow2; 7949 bm = aij->B->rmap->n; 7950 ba = b->a; 7951 inserted = PETSC_FALSE; 7952 } 7953 } else col = in[j]; 7954 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 7955 } 7956 } 7957 } else if (!aij->donotstash) { 7958 if (roworiented) { 7959 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7960 } else { 7961 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7962 } 7963 } 7964 } 7965 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 7966 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 7967 } 7968 PetscFunctionReturnVoid(); 7969 } 7970 7971 /* Undefining these here since they were redefined from their original definition above! No 7972 * other PETSc functions should be defined past this point, as it is impossible to recover the 7973 * original definitions */ 7974 #undef PetscCall 7975 #undef SETERRQ 7976