1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 10 Mat B; 11 12 PetscFunctionBegin; 13 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 14 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 15 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 16 PetscCall(MatDestroy(&B)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 21 Mat B; 22 23 PetscFunctionBegin; 24 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 25 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 26 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 27 PetscFunctionReturn(0); 28 } 29 30 /*MC 31 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 34 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 35 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 41 42 Developer Note: 43 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 44 enough exist. 45 46 Level: beginner 47 48 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 49 M*/ 50 51 /*MC 52 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 53 54 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 55 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 56 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 57 for communicators controlling multiple processes. It is recommended that you call both of 58 the above preallocation routines for simplicity. 59 60 Options Database Keys: 61 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 62 63 Level: beginner 64 65 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 66 M*/ 67 68 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) { 69 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 70 71 PetscFunctionBegin; 72 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 73 A->boundtocpu = flg; 74 #endif 75 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 76 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 77 78 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 79 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 80 * to differ from the parent matrix. */ 81 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 82 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 83 84 PetscFunctionReturn(0); 85 } 86 87 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) { 88 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 89 90 PetscFunctionBegin; 91 if (mat->A) { 92 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 93 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 94 } 95 PetscFunctionReturn(0); 96 } 97 98 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) { 99 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 100 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 101 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 102 const PetscInt *ia, *ib; 103 const MatScalar *aa, *bb, *aav, *bav; 104 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 105 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 106 107 PetscFunctionBegin; 108 *keptrows = NULL; 109 110 ia = a->i; 111 ib = b->i; 112 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 113 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 114 for (i = 0; i < m; i++) { 115 na = ia[i + 1] - ia[i]; 116 nb = ib[i + 1] - ib[i]; 117 if (!na && !nb) { 118 cnt++; 119 goto ok1; 120 } 121 aa = aav + ia[i]; 122 for (j = 0; j < na; j++) { 123 if (aa[j] != 0.0) goto ok1; 124 } 125 bb = bav + ib[i]; 126 for (j = 0; j < nb; j++) { 127 if (bb[j] != 0.0) goto ok1; 128 } 129 cnt++; 130 ok1:; 131 } 132 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 133 if (!n0rows) { 134 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 135 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 136 PetscFunctionReturn(0); 137 } 138 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 139 cnt = 0; 140 for (i = 0; i < m; i++) { 141 na = ia[i + 1] - ia[i]; 142 nb = ib[i + 1] - ib[i]; 143 if (!na && !nb) continue; 144 aa = aav + ia[i]; 145 for (j = 0; j < na; j++) { 146 if (aa[j] != 0.0) { 147 rows[cnt++] = rstart + i; 148 goto ok2; 149 } 150 } 151 bb = bav + ib[i]; 152 for (j = 0; j < nb; j++) { 153 if (bb[j] != 0.0) { 154 rows[cnt++] = rstart + i; 155 goto ok2; 156 } 157 } 158 ok2:; 159 } 160 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 161 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 162 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 163 PetscFunctionReturn(0); 164 } 165 166 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) { 167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 168 PetscBool cong; 169 170 PetscFunctionBegin; 171 PetscCall(MatHasCongruentLayouts(Y, &cong)); 172 if (Y->assembled && cong) { 173 PetscCall(MatDiagonalSet(aij->A, D, is)); 174 } else { 175 PetscCall(MatDiagonalSet_Default(Y, D, is)); 176 } 177 PetscFunctionReturn(0); 178 } 179 180 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) { 181 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 182 PetscInt i, rstart, nrows, *rows; 183 184 PetscFunctionBegin; 185 *zrows = NULL; 186 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 187 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 188 for (i = 0; i < nrows; i++) rows[i] += rstart; 189 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 190 PetscFunctionReturn(0); 191 } 192 193 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) { 194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 195 PetscInt i, m, n, *garray = aij->garray; 196 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 197 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 198 PetscReal *work; 199 const PetscScalar *dummy; 200 201 PetscFunctionBegin; 202 PetscCall(MatGetSize(A, &m, &n)); 203 PetscCall(PetscCalloc1(n, &work)); 204 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 205 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 206 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 207 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 208 if (type == NORM_2) { 209 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 210 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 211 } else if (type == NORM_1) { 212 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 213 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 214 } else if (type == NORM_INFINITY) { 215 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 216 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 217 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 218 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 219 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 220 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 221 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 222 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 223 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 224 if (type == NORM_INFINITY) { 225 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 226 } else { 227 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 228 } 229 PetscCall(PetscFree(work)); 230 if (type == NORM_2) { 231 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 232 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 233 for (i = 0; i < n; i++) reductions[i] /= m; 234 } 235 PetscFunctionReturn(0); 236 } 237 238 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) { 239 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 240 IS sis, gis; 241 const PetscInt *isis, *igis; 242 PetscInt n, *iis, nsis, ngis, rstart, i; 243 244 PetscFunctionBegin; 245 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 246 PetscCall(MatFindNonzeroRows(a->B, &gis)); 247 PetscCall(ISGetSize(gis, &ngis)); 248 PetscCall(ISGetSize(sis, &nsis)); 249 PetscCall(ISGetIndices(sis, &isis)); 250 PetscCall(ISGetIndices(gis, &igis)); 251 252 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 253 PetscCall(PetscArraycpy(iis, igis, ngis)); 254 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 255 n = ngis + nsis; 256 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 257 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 258 for (i = 0; i < n; i++) iis[i] += rstart; 259 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 260 261 PetscCall(ISRestoreIndices(sis, &isis)); 262 PetscCall(ISRestoreIndices(gis, &igis)); 263 PetscCall(ISDestroy(&sis)); 264 PetscCall(ISDestroy(&gis)); 265 PetscFunctionReturn(0); 266 } 267 268 /* 269 Local utility routine that creates a mapping from the global column 270 number to the local number in the off-diagonal part of the local 271 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 272 a slightly higher hash table cost; without it it is not scalable (each processor 273 has an order N integer array but is fast to access. 274 */ 275 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) { 276 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 277 PetscInt n = aij->B->cmap->n, i; 278 279 PetscFunctionBegin; 280 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 281 #if defined(PETSC_USE_CTABLE) 282 PetscCall(PetscTableCreate(n, mat->cmap->N + 1, &aij->colmap)); 283 for (i = 0; i < n; i++) PetscCall(PetscTableAdd(aij->colmap, aij->garray[i] + 1, i + 1, INSERT_VALUES)); 284 #else 285 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 286 PetscCall(PetscLogObjectMemory((PetscObject)mat, (mat->cmap->N + 1) * sizeof(PetscInt))); 287 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 288 #endif 289 PetscFunctionReturn(0); 290 } 291 292 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 293 { \ 294 if (col <= lastcol1) low1 = 0; \ 295 else high1 = nrow1; \ 296 lastcol1 = col; \ 297 while (high1 - low1 > 5) { \ 298 t = (low1 + high1) / 2; \ 299 if (rp1[t] > col) high1 = t; \ 300 else low1 = t; \ 301 } \ 302 for (_i = low1; _i < high1; _i++) { \ 303 if (rp1[_i] > col) break; \ 304 if (rp1[_i] == col) { \ 305 if (addv == ADD_VALUES) { \ 306 ap1[_i] += value; \ 307 /* Not sure LogFlops will slow dow the code or not */ \ 308 (void)PetscLogFlops(1.0); \ 309 } else ap1[_i] = value; \ 310 goto a_noinsert; \ 311 } \ 312 } \ 313 if (value == 0.0 && ignorezeroentries && row != col) { \ 314 low1 = 0; \ 315 high1 = nrow1; \ 316 goto a_noinsert; \ 317 } \ 318 if (nonew == 1) { \ 319 low1 = 0; \ 320 high1 = nrow1; \ 321 goto a_noinsert; \ 322 } \ 323 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 324 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 325 N = nrow1++ - 1; \ 326 a->nz++; \ 327 high1++; \ 328 /* shift up all the later entries in this row */ \ 329 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 330 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 331 rp1[_i] = col; \ 332 ap1[_i] = value; \ 333 A->nonzerostate++; \ 334 a_noinsert:; \ 335 ailen[row] = nrow1; \ 336 } 337 338 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 339 { \ 340 if (col <= lastcol2) low2 = 0; \ 341 else high2 = nrow2; \ 342 lastcol2 = col; \ 343 while (high2 - low2 > 5) { \ 344 t = (low2 + high2) / 2; \ 345 if (rp2[t] > col) high2 = t; \ 346 else low2 = t; \ 347 } \ 348 for (_i = low2; _i < high2; _i++) { \ 349 if (rp2[_i] > col) break; \ 350 if (rp2[_i] == col) { \ 351 if (addv == ADD_VALUES) { \ 352 ap2[_i] += value; \ 353 (void)PetscLogFlops(1.0); \ 354 } else ap2[_i] = value; \ 355 goto b_noinsert; \ 356 } \ 357 } \ 358 if (value == 0.0 && ignorezeroentries) { \ 359 low2 = 0; \ 360 high2 = nrow2; \ 361 goto b_noinsert; \ 362 } \ 363 if (nonew == 1) { \ 364 low2 = 0; \ 365 high2 = nrow2; \ 366 goto b_noinsert; \ 367 } \ 368 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 369 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 370 N = nrow2++ - 1; \ 371 b->nz++; \ 372 high2++; \ 373 /* shift up all the later entries in this row */ \ 374 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 375 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 376 rp2[_i] = col; \ 377 ap2[_i] = value; \ 378 B->nonzerostate++; \ 379 b_noinsert:; \ 380 bilen[row] = nrow2; \ 381 } 382 383 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) { 384 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 385 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 386 PetscInt l, *garray = mat->garray, diag; 387 PetscScalar *aa, *ba; 388 389 PetscFunctionBegin; 390 /* code only works for square matrices A */ 391 392 /* find size of row to the left of the diagonal part */ 393 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 394 row = row - diag; 395 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 396 if (garray[b->j[b->i[row] + l]] > diag) break; 397 } 398 if (l) { 399 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 400 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 401 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 402 } 403 404 /* diagonal part */ 405 if (a->i[row + 1] - a->i[row]) { 406 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 407 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 408 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 409 } 410 411 /* right of diagonal part */ 412 if (b->i[row + 1] - b->i[row] - l) { 413 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 414 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 416 } 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 421 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 422 PetscScalar value = 0.0; 423 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 424 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 425 PetscBool roworiented = aij->roworiented; 426 427 /* Some Variables required in the macro */ 428 Mat A = aij->A; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 430 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 431 PetscBool ignorezeroentries = a->ignorezeroentries; 432 Mat B = aij->B; 433 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 434 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 435 MatScalar *aa, *ba; 436 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 437 PetscInt nonew; 438 MatScalar *ap1, *ap2; 439 440 PetscFunctionBegin; 441 PetscCall(MatSeqAIJGetArray(A, &aa)); 442 PetscCall(MatSeqAIJGetArray(B, &ba)); 443 for (i = 0; i < m; i++) { 444 if (im[i] < 0) continue; 445 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 446 if (im[i] >= rstart && im[i] < rend) { 447 row = im[i] - rstart; 448 lastcol1 = -1; 449 rp1 = aj + ai[row]; 450 ap1 = aa + ai[row]; 451 rmax1 = aimax[row]; 452 nrow1 = ailen[row]; 453 low1 = 0; 454 high1 = nrow1; 455 lastcol2 = -1; 456 rp2 = bj + bi[row]; 457 ap2 = ba + bi[row]; 458 rmax2 = bimax[row]; 459 nrow2 = bilen[row]; 460 low2 = 0; 461 high2 = nrow2; 462 463 for (j = 0; j < n; j++) { 464 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 465 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 466 if (in[j] >= cstart && in[j] < cend) { 467 col = in[j] - cstart; 468 nonew = a->nonew; 469 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 470 } else if (in[j] < 0) { 471 continue; 472 } else { 473 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 474 if (mat->was_assembled) { 475 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 476 #if defined(PETSC_USE_CTABLE) 477 PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); /* map global col ids to local ones */ 478 col--; 479 #else 480 col = aij->colmap[in[j]] - 1; 481 #endif 482 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 483 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 484 col = in[j]; 485 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 486 B = aij->B; 487 b = (Mat_SeqAIJ *)B->data; 488 bimax = b->imax; 489 bi = b->i; 490 bilen = b->ilen; 491 bj = b->j; 492 ba = b->a; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 bm = aij->B->rmap->n; 500 ba = b->a; 501 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 502 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 503 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 504 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 505 } 506 } else col = in[j]; 507 nonew = b->nonew; 508 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 509 } 510 } 511 } else { 512 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 513 if (!aij->donotstash) { 514 mat->assembled = PETSC_FALSE; 515 if (roworiented) { 516 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 517 } else { 518 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 519 } 520 } 521 } 522 } 523 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 524 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 525 PetscFunctionReturn(0); 526 } 527 528 /* 529 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 530 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 531 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 532 */ 533 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) { 534 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 535 Mat A = aij->A; /* diagonal part of the matrix */ 536 Mat B = aij->B; /* offdiagonal part of the matrix */ 537 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 539 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 540 PetscInt *ailen = a->ilen, *aj = a->j; 541 PetscInt *bilen = b->ilen, *bj = b->j; 542 PetscInt am = aij->A->rmap->n, j; 543 PetscInt diag_so_far = 0, dnz; 544 PetscInt offd_so_far = 0, onz; 545 546 PetscFunctionBegin; 547 /* Iterate over all rows of the matrix */ 548 for (j = 0; j < am; j++) { 549 dnz = onz = 0; 550 /* Iterate over all non-zero columns of the current row */ 551 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 552 /* If column is in the diagonal */ 553 if (mat_j[col] >= cstart && mat_j[col] < cend) { 554 aj[diag_so_far++] = mat_j[col] - cstart; 555 dnz++; 556 } else { /* off-diagonal entries */ 557 bj[offd_so_far++] = mat_j[col]; 558 onz++; 559 } 560 } 561 ailen[j] = dnz; 562 bilen[j] = onz; 563 } 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 569 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 570 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 571 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 572 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 573 */ 574 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) { 575 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 576 Mat A = aij->A; /* diagonal part of the matrix */ 577 Mat B = aij->B; /* offdiagonal part of the matrix */ 578 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 579 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 580 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 581 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 582 PetscInt *ailen = a->ilen, *aj = a->j; 583 PetscInt *bilen = b->ilen, *bj = b->j; 584 PetscInt am = aij->A->rmap->n, j; 585 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 586 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 587 PetscScalar *aa = a->a, *ba = b->a; 588 589 PetscFunctionBegin; 590 /* Iterate over all rows of the matrix */ 591 for (j = 0; j < am; j++) { 592 dnz_row = onz_row = 0; 593 rowstart_offd = full_offd_i[j]; 594 rowstart_diag = full_diag_i[j]; 595 /* Iterate over all non-zero columns of the current row */ 596 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 597 /* If column is in the diagonal */ 598 if (mat_j[col] >= cstart && mat_j[col] < cend) { 599 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 600 aa[rowstart_diag + dnz_row] = mat_a[col]; 601 dnz_row++; 602 } else { /* off-diagonal entries */ 603 bj[rowstart_offd + onz_row] = mat_j[col]; 604 ba[rowstart_offd + onz_row] = mat_a[col]; 605 onz_row++; 606 } 607 } 608 ailen[j] = dnz_row; 609 bilen[j] = onz_row; 610 } 611 PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 615 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 616 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 617 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 618 619 PetscFunctionBegin; 620 for (i = 0; i < m; i++) { 621 if (idxm[i] < 0) continue; /* negative row */ 622 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 623 if (idxm[i] >= rstart && idxm[i] < rend) { 624 row = idxm[i] - rstart; 625 for (j = 0; j < n; j++) { 626 if (idxn[j] < 0) continue; /* negative column */ 627 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 628 if (idxn[j] >= cstart && idxn[j] < cend) { 629 col = idxn[j] - cstart; 630 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 631 } else { 632 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 633 #if defined(PETSC_USE_CTABLE) 634 PetscCall(PetscTableFind(aij->colmap, idxn[j] + 1, &col)); 635 col--; 636 #else 637 col = aij->colmap[idxn[j]] - 1; 638 #endif 639 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 640 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 641 } 642 } 643 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 644 } 645 PetscFunctionReturn(0); 646 } 647 648 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) { 649 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 650 PetscInt nstash, reallocs; 651 652 PetscFunctionBegin; 653 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 654 655 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 656 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 657 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 658 PetscFunctionReturn(0); 659 } 660 661 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) { 662 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 663 PetscMPIInt n; 664 PetscInt i, j, rstart, ncols, flg; 665 PetscInt *row, *col; 666 PetscBool other_disassembled; 667 PetscScalar *val; 668 669 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 670 671 PetscFunctionBegin; 672 if (!aij->donotstash && !mat->nooffprocentries) { 673 while (1) { 674 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 675 if (!flg) break; 676 677 for (i = 0; i < n;) { 678 /* Now identify the consecutive vals belonging to the same row */ 679 for (j = i, rstart = row[j]; j < n; j++) { 680 if (row[j] != rstart) break; 681 } 682 if (j < n) ncols = j - i; 683 else ncols = n - i; 684 /* Now assemble all these values with a single function call */ 685 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 686 i = j; 687 } 688 } 689 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 690 } 691 #if defined(PETSC_HAVE_DEVICE) 692 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 693 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 694 if (mat->boundtocpu) { 695 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 696 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 697 } 698 #endif 699 PetscCall(MatAssemblyBegin(aij->A, mode)); 700 PetscCall(MatAssemblyEnd(aij->A, mode)); 701 702 /* determine if any processor has disassembled, if so we must 703 also disassemble ourself, in order that we may reassemble. */ 704 /* 705 if nonzero structure of submatrix B cannot change then we know that 706 no processor disassembled thus we can skip this stuff 707 */ 708 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 709 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 710 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 711 PetscCall(MatDisAssemble_MPIAIJ(mat)); 712 } 713 } 714 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 715 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 716 #if defined(PETSC_HAVE_DEVICE) 717 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 718 #endif 719 PetscCall(MatAssemblyBegin(aij->B, mode)); 720 PetscCall(MatAssemblyEnd(aij->B, mode)); 721 722 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 723 724 aij->rowvalues = NULL; 725 726 PetscCall(VecDestroy(&aij->diag)); 727 728 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 729 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 730 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 731 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 732 } 733 #if defined(PETSC_HAVE_DEVICE) 734 mat->offloadmask = PETSC_OFFLOAD_BOTH; 735 #endif 736 PetscFunctionReturn(0); 737 } 738 739 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) { 740 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 741 742 PetscFunctionBegin; 743 PetscCall(MatZeroEntries(l->A)); 744 PetscCall(MatZeroEntries(l->B)); 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 749 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 750 PetscObjectState sA, sB; 751 PetscInt *lrows; 752 PetscInt r, len; 753 PetscBool cong, lch, gch; 754 755 PetscFunctionBegin; 756 /* get locally owned rows */ 757 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 758 PetscCall(MatHasCongruentLayouts(A, &cong)); 759 /* fix right hand side if needed */ 760 if (x && b) { 761 const PetscScalar *xx; 762 PetscScalar *bb; 763 764 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 765 PetscCall(VecGetArrayRead(x, &xx)); 766 PetscCall(VecGetArray(b, &bb)); 767 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 768 PetscCall(VecRestoreArrayRead(x, &xx)); 769 PetscCall(VecRestoreArray(b, &bb)); 770 } 771 772 sA = mat->A->nonzerostate; 773 sB = mat->B->nonzerostate; 774 775 if (diag != 0.0 && cong) { 776 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 777 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 778 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 779 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 780 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 781 PetscInt nnwA, nnwB; 782 PetscBool nnzA, nnzB; 783 784 nnwA = aijA->nonew; 785 nnwB = aijB->nonew; 786 nnzA = aijA->keepnonzeropattern; 787 nnzB = aijB->keepnonzeropattern; 788 if (!nnzA) { 789 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 790 aijA->nonew = 0; 791 } 792 if (!nnzB) { 793 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 794 aijB->nonew = 0; 795 } 796 /* Must zero here before the next loop */ 797 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 798 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 799 for (r = 0; r < len; ++r) { 800 const PetscInt row = lrows[r] + A->rmap->rstart; 801 if (row >= A->cmap->N) continue; 802 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 803 } 804 aijA->nonew = nnwA; 805 aijB->nonew = nnwB; 806 } else { 807 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 808 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 809 } 810 PetscCall(PetscFree(lrows)); 811 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 812 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 813 814 /* reduce nonzerostate */ 815 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 816 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 817 if (gch) A->nonzerostate++; 818 PetscFunctionReturn(0); 819 } 820 821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 823 PetscMPIInt n = A->rmap->n; 824 PetscInt i, j, r, m, len = 0; 825 PetscInt *lrows, *owners = A->rmap->range; 826 PetscMPIInt p = 0; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb, *mask, *aij_a; 831 Vec xmask, lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 833 const PetscInt *aj, *ii, *ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 PetscCall(PetscMalloc1(n, &lrows)); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 PetscCall(PetscMalloc1(N, &rrows)); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 844 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 851 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 852 /* Collect flags for rows to be zeroed */ 853 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 854 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 855 PetscCall(PetscSFDestroy(&sf)); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) 858 if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 861 /* handle off diagonal part of matrix */ 862 PetscCall(MatCreateVecs(A, &xmask, NULL)); 863 PetscCall(VecDuplicate(l->lvec, &lmask)); 864 PetscCall(VecGetArray(xmask, &bb)); 865 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 866 PetscCall(VecRestoreArray(xmask, &bb)); 867 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 868 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 869 PetscCall(VecDestroy(&xmask)); 870 if (x && b) { /* this code is buggy when the row and column layout don't match */ 871 PetscBool cong; 872 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 875 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 876 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 877 PetscCall(VecGetArrayRead(l->lvec, &xx)); 878 PetscCall(VecGetArray(b, &bb)); 879 } 880 PetscCall(VecGetArray(lmask, &mask)); 881 /* remove zeroed rows of off diagonal matrix */ 882 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 883 ii = aij->i; 884 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 885 /* loop over all elements of off process part of matrix zeroing removed columns*/ 886 if (aij->compressedrow.use) { 887 m = aij->compressedrow.nrows; 888 ii = aij->compressedrow.i; 889 ridx = aij->compressedrow.rindex; 890 for (i = 0; i < m; i++) { 891 n = ii[i + 1] - ii[i]; 892 aj = aij->j + ii[i]; 893 aa = aij_a + ii[i]; 894 895 for (j = 0; j < n; j++) { 896 if (PetscAbsScalar(mask[*aj])) { 897 if (b) bb[*ridx] -= *aa * xx[*aj]; 898 *aa = 0.0; 899 } 900 aa++; 901 aj++; 902 } 903 ridx++; 904 } 905 } else { /* do not use compressed row format */ 906 m = l->B->rmap->n; 907 for (i = 0; i < m; i++) { 908 n = ii[i + 1] - ii[i]; 909 aj = aij->j + ii[i]; 910 aa = aij_a + ii[i]; 911 for (j = 0; j < n; j++) { 912 if (PetscAbsScalar(mask[*aj])) { 913 if (b) bb[i] -= *aa * xx[*aj]; 914 *aa = 0.0; 915 } 916 aa++; 917 aj++; 918 } 919 } 920 } 921 if (x && b) { 922 PetscCall(VecRestoreArray(b, &bb)); 923 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 924 } 925 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 926 PetscCall(VecRestoreArray(lmask, &mask)); 927 PetscCall(VecDestroy(&lmask)); 928 PetscCall(PetscFree(lrows)); 929 930 /* only change matrix nonzero state if pattern was allowed to be changed */ 931 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 932 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 933 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 934 } 935 PetscFunctionReturn(0); 936 } 937 938 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) { 939 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 940 PetscInt nt; 941 VecScatter Mvctx = a->Mvctx; 942 943 PetscFunctionBegin; 944 PetscCall(VecGetLocalSize(xx, &nt)); 945 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 946 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 947 PetscUseTypeMethod(a->A, mult, xx, yy); 948 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 949 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 950 PetscFunctionReturn(0); 951 } 952 953 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) { 954 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 955 956 PetscFunctionBegin; 957 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 962 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 963 VecScatter Mvctx = a->Mvctx; 964 965 PetscFunctionBegin; 966 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 967 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 968 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 969 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) { 974 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 975 976 PetscFunctionBegin; 977 /* do nondiagonal part */ 978 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 979 /* do local part */ 980 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 981 /* add partial results together */ 982 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 983 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 984 PetscFunctionReturn(0); 985 } 986 987 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) { 988 MPI_Comm comm; 989 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij; 990 Mat Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs; 991 IS Me, Notme; 992 PetscInt M, N, first, last, *notme, i; 993 PetscBool lf; 994 PetscMPIInt size; 995 996 PetscFunctionBegin; 997 /* Easy test: symmetric diagonal block */ 998 Bij = (Mat_MPIAIJ *)Bmat->data; 999 Bdia = Bij->A; 1000 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1001 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1002 if (!*f) PetscFunctionReturn(0); 1003 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1004 PetscCallMPI(MPI_Comm_size(comm, &size)); 1005 if (size == 1) PetscFunctionReturn(0); 1006 1007 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1008 PetscCall(MatGetSize(Amat, &M, &N)); 1009 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1010 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1011 for (i = 0; i < first; i++) notme[i] = i; 1012 for (i = last; i < M; i++) notme[i - last + first] = i; 1013 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1014 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1015 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1016 Aoff = Aoffs[0]; 1017 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1018 Boff = Boffs[0]; 1019 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1020 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1021 PetscCall(MatDestroyMatrices(1, &Boffs)); 1022 PetscCall(ISDestroy(&Me)); 1023 PetscCall(ISDestroy(&Notme)); 1024 PetscCall(PetscFree(notme)); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) { 1029 PetscFunctionBegin; 1030 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1031 PetscFunctionReturn(0); 1032 } 1033 1034 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1035 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1036 1037 PetscFunctionBegin; 1038 /* do nondiagonal part */ 1039 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1040 /* do local part */ 1041 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1042 /* add partial results together */ 1043 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1044 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1045 PetscFunctionReturn(0); 1046 } 1047 1048 /* 1049 This only works correctly for square matrices where the subblock A->A is the 1050 diagonal block 1051 */ 1052 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) { 1053 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1054 1055 PetscFunctionBegin; 1056 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1057 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1058 PetscCall(MatGetDiagonal(a->A, v)); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) { 1063 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1064 1065 PetscFunctionBegin; 1066 PetscCall(MatScale(a->A, aa)); 1067 PetscCall(MatScale(a->B, aa)); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1072 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) { 1073 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1074 1075 PetscFunctionBegin; 1076 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1077 PetscCall(PetscFree(aij->Aperm1)); 1078 PetscCall(PetscFree(aij->Bperm1)); 1079 PetscCall(PetscFree(aij->Ajmap1)); 1080 PetscCall(PetscFree(aij->Bjmap1)); 1081 1082 PetscCall(PetscFree(aij->Aimap2)); 1083 PetscCall(PetscFree(aij->Bimap2)); 1084 PetscCall(PetscFree(aij->Aperm2)); 1085 PetscCall(PetscFree(aij->Bperm2)); 1086 PetscCall(PetscFree(aij->Ajmap2)); 1087 PetscCall(PetscFree(aij->Bjmap2)); 1088 1089 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1090 PetscCall(PetscFree(aij->Cperm1)); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) { 1095 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1096 1097 PetscFunctionBegin; 1098 #if defined(PETSC_USE_LOG) 1099 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1100 #endif 1101 PetscCall(MatStashDestroy_Private(&mat->stash)); 1102 PetscCall(VecDestroy(&aij->diag)); 1103 PetscCall(MatDestroy(&aij->A)); 1104 PetscCall(MatDestroy(&aij->B)); 1105 #if defined(PETSC_USE_CTABLE) 1106 PetscCall(PetscTableDestroy(&aij->colmap)); 1107 #else 1108 PetscCall(PetscFree(aij->colmap)); 1109 #endif 1110 PetscCall(PetscFree(aij->garray)); 1111 PetscCall(VecDestroy(&aij->lvec)); 1112 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1113 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 1114 PetscCall(PetscFree(aij->ld)); 1115 1116 /* Free COO */ 1117 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1118 1119 PetscCall(PetscFree(mat->data)); 1120 1121 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1122 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 1123 1124 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1125 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1126 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1127 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 1128 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 1129 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 1130 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 1131 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1132 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 1133 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 1134 #if defined(PETSC_HAVE_CUDA) 1135 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 1136 #endif 1137 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1138 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 1139 #endif 1140 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 1141 #if defined(PETSC_HAVE_ELEMENTAL) 1142 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 1143 #endif 1144 #if defined(PETSC_HAVE_SCALAPACK) 1145 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 1146 #endif 1147 #if defined(PETSC_HAVE_HYPRE) 1148 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 1149 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 1150 #endif 1151 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1152 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 1153 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 1154 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 1157 #if defined(PETSC_HAVE_MKL_SPARSE) 1158 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 1159 #endif 1160 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 1161 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) { 1169 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1170 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1171 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1172 const PetscInt *garray = aij->garray; 1173 const PetscScalar *aa, *ba; 1174 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1175 PetscInt *rowlens; 1176 PetscInt *colidxs; 1177 PetscScalar *matvals; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 header[3] = nz; 1194 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1195 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1196 1197 /* fill in and store row lengths */ 1198 PetscCall(PetscMalloc1(m, &rowlens)); 1199 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1200 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1201 PetscCall(PetscFree(rowlens)); 1202 1203 /* fill in and store column indices */ 1204 PetscCall(PetscMalloc1(nz, &colidxs)); 1205 for (cnt = 0, i = 0; i < m; i++) { 1206 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1207 if (garray[B->j[jb]] > cs) break; 1208 colidxs[cnt++] = garray[B->j[jb]]; 1209 } 1210 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1211 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1212 } 1213 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1214 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1215 PetscCall(PetscFree(colidxs)); 1216 1217 /* fill in and store nonzero values */ 1218 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1219 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1220 PetscCall(PetscMalloc1(nz, &matvals)); 1221 for (cnt = 0, i = 0; i < m; i++) { 1222 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1223 if (garray[B->j[jb]] > cs) break; 1224 matvals[cnt++] = ba[jb]; 1225 } 1226 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1227 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1228 } 1229 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1230 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1231 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1232 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1233 PetscCall(PetscFree(matvals)); 1234 1235 /* write block size option to the viewer's .info file */ 1236 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #include <petscdraw.h> 1241 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 1242 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1243 PetscMPIInt rank = aij->rank, size = aij->size; 1244 PetscBool isdraw, iascii, isbinary; 1245 PetscViewer sviewer; 1246 PetscViewerFormat format; 1247 1248 PetscFunctionBegin; 1249 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1250 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1251 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1252 if (iascii) { 1253 PetscCall(PetscViewerGetFormat(viewer, &format)); 1254 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1255 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1256 PetscCall(PetscMalloc1(size, &nz)); 1257 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1258 for (i = 0; i < (PetscInt)size; i++) { 1259 nmax = PetscMax(nmax, nz[i]); 1260 nmin = PetscMin(nmin, nz[i]); 1261 navg += nz[i]; 1262 } 1263 PetscCall(PetscFree(nz)); 1264 navg = navg / size; 1265 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1266 PetscFunctionReturn(0); 1267 } 1268 PetscCall(PetscViewerGetFormat(viewer, &format)); 1269 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1270 MatInfo info; 1271 PetscInt *inodes = NULL; 1272 1273 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1274 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1275 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1276 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1277 if (!inodes) { 1278 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1279 (double)info.memory)); 1280 } else { 1281 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1282 (double)info.memory)); 1283 } 1284 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1285 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1286 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1287 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1288 PetscCall(PetscViewerFlush(viewer)); 1289 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1290 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1291 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1292 PetscFunctionReturn(0); 1293 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1294 PetscInt inodecount, inodelimit, *inodes; 1295 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1296 if (inodes) { 1297 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1298 } else { 1299 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1300 } 1301 PetscFunctionReturn(0); 1302 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1303 PetscFunctionReturn(0); 1304 } 1305 } else if (isbinary) { 1306 if (size == 1) { 1307 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1308 PetscCall(MatView(aij->A, viewer)); 1309 } else { 1310 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1311 } 1312 PetscFunctionReturn(0); 1313 } else if (iascii && size == 1) { 1314 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1315 PetscCall(MatView(aij->A, viewer)); 1316 PetscFunctionReturn(0); 1317 } else if (isdraw) { 1318 PetscDraw draw; 1319 PetscBool isnull; 1320 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1321 PetscCall(PetscDrawIsNull(draw, &isnull)); 1322 if (isnull) PetscFunctionReturn(0); 1323 } 1324 1325 { /* assemble the entire matrix onto first processor */ 1326 Mat A = NULL, Av; 1327 IS isrow, iscol; 1328 1329 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1330 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1331 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1332 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1333 /* The commented code uses MatCreateSubMatrices instead */ 1334 /* 1335 Mat *AA, A = NULL, Av; 1336 IS isrow,iscol; 1337 1338 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1339 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1340 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1341 if (rank == 0) { 1342 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1343 A = AA[0]; 1344 Av = AA[0]; 1345 } 1346 PetscCall(MatDestroySubMatrices(1,&AA)); 1347 */ 1348 PetscCall(ISDestroy(&iscol)); 1349 PetscCall(ISDestroy(&isrow)); 1350 /* 1351 Everyone has to call to draw the matrix since the graphics waits are 1352 synchronized across all processors that share the PetscDraw object 1353 */ 1354 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1355 if (rank == 0) { 1356 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1357 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1358 } 1359 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 PetscCall(PetscViewerFlush(viewer)); 1361 PetscCall(MatDestroy(&A)); 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) { 1367 PetscBool iascii, isdraw, issocket, isbinary; 1368 1369 PetscFunctionBegin; 1370 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1371 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1372 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1373 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1374 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 1379 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1380 Vec bb1 = NULL; 1381 PetscBool hasop; 1382 1383 PetscFunctionBegin; 1384 if (flag == SOR_APPLY_UPPER) { 1385 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1386 PetscFunctionReturn(0); 1387 } 1388 1389 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1390 1391 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1392 if (flag & SOR_ZERO_INITIAL_GUESS) { 1393 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1394 its--; 1395 } 1396 1397 while (its--) { 1398 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1399 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1400 1401 /* update rhs: bb1 = bb - B*x */ 1402 PetscCall(VecScale(mat->lvec, -1.0)); 1403 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1404 1405 /* local sweep */ 1406 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1407 } 1408 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1409 if (flag & SOR_ZERO_INITIAL_GUESS) { 1410 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1411 its--; 1412 } 1413 while (its--) { 1414 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1415 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1416 1417 /* update rhs: bb1 = bb - B*x */ 1418 PetscCall(VecScale(mat->lvec, -1.0)); 1419 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1420 1421 /* local sweep */ 1422 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1423 } 1424 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1425 if (flag & SOR_ZERO_INITIAL_GUESS) { 1426 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1427 its--; 1428 } 1429 while (its--) { 1430 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1431 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1432 1433 /* update rhs: bb1 = bb - B*x */ 1434 PetscCall(VecScale(mat->lvec, -1.0)); 1435 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1436 1437 /* local sweep */ 1438 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1439 } 1440 } else if (flag & SOR_EISENSTAT) { 1441 Vec xx1; 1442 1443 PetscCall(VecDuplicate(bb, &xx1)); 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1445 1446 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1447 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1448 if (!mat->diag) { 1449 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1450 PetscCall(MatGetDiagonal(matin, mat->diag)); 1451 } 1452 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1453 if (hasop) { 1454 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1455 } else { 1456 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1457 } 1458 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1459 1460 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1461 1462 /* local sweep */ 1463 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1464 PetscCall(VecAXPY(xx, 1.0, xx1)); 1465 PetscCall(VecDestroy(&xx1)); 1466 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1467 1468 PetscCall(VecDestroy(&bb1)); 1469 1470 matin->factorerrortype = mat->A->factorerrortype; 1471 PetscFunctionReturn(0); 1472 } 1473 1474 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) { 1475 Mat aA, aB, Aperm; 1476 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1477 PetscScalar *aa, *ba; 1478 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1479 PetscSF rowsf, sf; 1480 IS parcolp = NULL; 1481 PetscBool done; 1482 1483 PetscFunctionBegin; 1484 PetscCall(MatGetLocalSize(A, &m, &n)); 1485 PetscCall(ISGetIndices(rowp, &rwant)); 1486 PetscCall(ISGetIndices(colp, &cwant)); 1487 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1488 1489 /* Invert row permutation to find out where my rows should go */ 1490 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1491 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1492 PetscCall(PetscSFSetFromOptions(rowsf)); 1493 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1494 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1495 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1496 1497 /* Invert column permutation to find out where my columns should go */ 1498 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1499 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1500 PetscCall(PetscSFSetFromOptions(sf)); 1501 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1502 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1503 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1504 PetscCall(PetscSFDestroy(&sf)); 1505 1506 PetscCall(ISRestoreIndices(rowp, &rwant)); 1507 PetscCall(ISRestoreIndices(colp, &cwant)); 1508 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1509 1510 /* Find out where my gcols should go */ 1511 PetscCall(MatGetSize(aB, NULL, &ng)); 1512 PetscCall(PetscMalloc1(ng, &gcdest)); 1513 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1514 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1515 PetscCall(PetscSFSetFromOptions(sf)); 1516 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1517 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1518 PetscCall(PetscSFDestroy(&sf)); 1519 1520 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1521 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1522 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1523 for (i = 0; i < m; i++) { 1524 PetscInt row = rdest[i]; 1525 PetscMPIInt rowner; 1526 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1527 for (j = ai[i]; j < ai[i + 1]; j++) { 1528 PetscInt col = cdest[aj[j]]; 1529 PetscMPIInt cowner; 1530 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1531 if (rowner == cowner) dnnz[i]++; 1532 else onnz[i]++; 1533 } 1534 for (j = bi[i]; j < bi[i + 1]; j++) { 1535 PetscInt col = gcdest[bj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 } 1542 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1543 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1544 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1545 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1546 PetscCall(PetscSFDestroy(&rowsf)); 1547 1548 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1549 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1550 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1551 for (i = 0; i < m; i++) { 1552 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1553 PetscInt j0, rowlen; 1554 rowlen = ai[i + 1] - ai[i]; 1555 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1556 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1557 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1558 } 1559 rowlen = bi[i + 1] - bi[i]; 1560 for (j0 = j = 0; j < rowlen; j0 = j) { 1561 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1562 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1563 } 1564 } 1565 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1566 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1567 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1568 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1569 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1570 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1571 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1572 PetscCall(PetscFree3(work, rdest, cdest)); 1573 PetscCall(PetscFree(gcdest)); 1574 if (parcolp) PetscCall(ISDestroy(&colp)); 1575 *B = Aperm; 1576 PetscFunctionReturn(0); 1577 } 1578 1579 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) { 1580 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1581 1582 PetscFunctionBegin; 1583 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1584 if (ghosts) *ghosts = aij->garray; 1585 PetscFunctionReturn(0); 1586 } 1587 1588 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) { 1589 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1590 Mat A = mat->A, B = mat->B; 1591 PetscLogDouble isend[5], irecv[5]; 1592 1593 PetscFunctionBegin; 1594 info->block_size = 1.0; 1595 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1596 1597 isend[0] = info->nz_used; 1598 isend[1] = info->nz_allocated; 1599 isend[2] = info->nz_unneeded; 1600 isend[3] = info->memory; 1601 isend[4] = info->mallocs; 1602 1603 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1604 1605 isend[0] += info->nz_used; 1606 isend[1] += info->nz_allocated; 1607 isend[2] += info->nz_unneeded; 1608 isend[3] += info->memory; 1609 isend[4] += info->mallocs; 1610 if (flag == MAT_LOCAL) { 1611 info->nz_used = isend[0]; 1612 info->nz_allocated = isend[1]; 1613 info->nz_unneeded = isend[2]; 1614 info->memory = isend[3]; 1615 info->mallocs = isend[4]; 1616 } else if (flag == MAT_GLOBAL_MAX) { 1617 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1618 1619 info->nz_used = irecv[0]; 1620 info->nz_allocated = irecv[1]; 1621 info->nz_unneeded = irecv[2]; 1622 info->memory = irecv[3]; 1623 info->mallocs = irecv[4]; 1624 } else if (flag == MAT_GLOBAL_SUM) { 1625 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1626 1627 info->nz_used = irecv[0]; 1628 info->nz_allocated = irecv[1]; 1629 info->nz_unneeded = irecv[2]; 1630 info->memory = irecv[3]; 1631 info->mallocs = irecv[4]; 1632 } 1633 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1634 info->fill_ratio_needed = 0; 1635 info->factor_mallocs = 0; 1636 PetscFunctionReturn(0); 1637 } 1638 1639 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) { 1640 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1641 1642 PetscFunctionBegin; 1643 switch (op) { 1644 case MAT_NEW_NONZERO_LOCATIONS: 1645 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1646 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1647 case MAT_KEEP_NONZERO_PATTERN: 1648 case MAT_NEW_NONZERO_LOCATION_ERR: 1649 case MAT_USE_INODES: 1650 case MAT_IGNORE_ZERO_ENTRIES: 1651 case MAT_FORM_EXPLICIT_TRANSPOSE: 1652 MatCheckPreallocated(A, 1); 1653 PetscCall(MatSetOption(a->A, op, flg)); 1654 PetscCall(MatSetOption(a->B, op, flg)); 1655 break; 1656 case MAT_ROW_ORIENTED: 1657 MatCheckPreallocated(A, 1); 1658 a->roworiented = flg; 1659 1660 PetscCall(MatSetOption(a->A, op, flg)); 1661 PetscCall(MatSetOption(a->B, op, flg)); 1662 break; 1663 case MAT_FORCE_DIAGONAL_ENTRIES: 1664 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 1665 case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 1666 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1667 case MAT_SPD: 1668 case MAT_SYMMETRIC: 1669 case MAT_STRUCTURALLY_SYMMETRIC: 1670 case MAT_HERMITIAN: 1671 case MAT_SYMMETRY_ETERNAL: 1672 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1673 case MAT_SPD_ETERNAL: 1674 /* if the diagonal matrix is square it inherits some of the properties above */ 1675 break; 1676 case MAT_SUBMAT_SINGLEIS: A->submat_singleis = flg; break; 1677 case MAT_STRUCTURE_ONLY: 1678 /* The option is handled directly by MatSetOption() */ 1679 break; 1680 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1681 } 1682 PetscFunctionReturn(0); 1683 } 1684 1685 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1686 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1687 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1688 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1689 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1690 PetscInt *cmap, *idx_p; 1691 1692 PetscFunctionBegin; 1693 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1694 mat->getrowactive = PETSC_TRUE; 1695 1696 if (!mat->rowvalues && (idx || v)) { 1697 /* 1698 allocate enough space to hold information from the longest row. 1699 */ 1700 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1701 PetscInt max = 1, tmp; 1702 for (i = 0; i < matin->rmap->n; i++) { 1703 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1704 if (max < tmp) max = tmp; 1705 } 1706 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1707 } 1708 1709 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1710 lrow = row - rstart; 1711 1712 pvA = &vworkA; 1713 pcA = &cworkA; 1714 pvB = &vworkB; 1715 pcB = &cworkB; 1716 if (!v) { 1717 pvA = NULL; 1718 pvB = NULL; 1719 } 1720 if (!idx) { 1721 pcA = NULL; 1722 if (!v) pcB = NULL; 1723 } 1724 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1725 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1726 nztot = nzA + nzB; 1727 1728 cmap = mat->garray; 1729 if (v || idx) { 1730 if (nztot) { 1731 /* Sort by increasing column numbers, assuming A and B already sorted */ 1732 PetscInt imark = -1; 1733 if (v) { 1734 *v = v_p = mat->rowvalues; 1735 for (i = 0; i < nzB; i++) { 1736 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1737 else break; 1738 } 1739 imark = i; 1740 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1741 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1742 } 1743 if (idx) { 1744 *idx = idx_p = mat->rowindices; 1745 if (imark > -1) { 1746 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1747 } else { 1748 for (i = 0; i < nzB; i++) { 1749 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1750 else break; 1751 } 1752 imark = i; 1753 } 1754 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1755 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1756 } 1757 } else { 1758 if (idx) *idx = NULL; 1759 if (v) *v = NULL; 1760 } 1761 } 1762 *nz = nztot; 1763 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1764 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1769 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1770 1771 PetscFunctionBegin; 1772 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1773 aij->getrowactive = PETSC_FALSE; 1774 PetscFunctionReturn(0); 1775 } 1776 1777 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) { 1778 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1779 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1780 PetscInt i, j, cstart = mat->cmap->rstart; 1781 PetscReal sum = 0.0; 1782 const MatScalar *v, *amata, *bmata; 1783 1784 PetscFunctionBegin; 1785 if (aij->size == 1) { 1786 PetscCall(MatNorm(aij->A, type, norm)); 1787 } else { 1788 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1789 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1790 if (type == NORM_FROBENIUS) { 1791 v = amata; 1792 for (i = 0; i < amat->nz; i++) { 1793 sum += PetscRealPart(PetscConj(*v) * (*v)); 1794 v++; 1795 } 1796 v = bmata; 1797 for (i = 0; i < bmat->nz; i++) { 1798 sum += PetscRealPart(PetscConj(*v) * (*v)); 1799 v++; 1800 } 1801 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1802 *norm = PetscSqrtReal(*norm); 1803 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1804 } else if (type == NORM_1) { /* max column norm */ 1805 PetscReal *tmp, *tmp2; 1806 PetscInt *jj, *garray = aij->garray; 1807 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1808 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1809 *norm = 0.0; 1810 v = amata; 1811 jj = amat->j; 1812 for (j = 0; j < amat->nz; j++) { 1813 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1814 v++; 1815 } 1816 v = bmata; 1817 jj = bmat->j; 1818 for (j = 0; j < bmat->nz; j++) { 1819 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1820 v++; 1821 } 1822 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1823 for (j = 0; j < mat->cmap->N; j++) { 1824 if (tmp2[j] > *norm) *norm = tmp2[j]; 1825 } 1826 PetscCall(PetscFree(tmp)); 1827 PetscCall(PetscFree(tmp2)); 1828 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1829 } else if (type == NORM_INFINITY) { /* max row norm */ 1830 PetscReal ntemp = 0.0; 1831 for (j = 0; j < aij->A->rmap->n; j++) { 1832 v = amata + amat->i[j]; 1833 sum = 0.0; 1834 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1835 sum += PetscAbsScalar(*v); 1836 v++; 1837 } 1838 v = bmata + bmat->i[j]; 1839 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1840 sum += PetscAbsScalar(*v); 1841 v++; 1842 } 1843 if (sum > ntemp) ntemp = sum; 1844 } 1845 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1846 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1847 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1848 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1849 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1850 } 1851 PetscFunctionReturn(0); 1852 } 1853 1854 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) { 1855 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1856 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1857 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1858 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1859 Mat B, A_diag, *B_diag; 1860 const MatScalar *pbv, *bv; 1861 1862 PetscFunctionBegin; 1863 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1864 ma = A->rmap->n; 1865 na = A->cmap->n; 1866 mb = a->B->rmap->n; 1867 nb = a->B->cmap->n; 1868 ai = Aloc->i; 1869 aj = Aloc->j; 1870 bi = Bloc->i; 1871 bj = Bloc->j; 1872 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1873 PetscInt *d_nnz, *g_nnz, *o_nnz; 1874 PetscSFNode *oloc; 1875 PETSC_UNUSED PetscSF sf; 1876 1877 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1878 /* compute d_nnz for preallocation */ 1879 PetscCall(PetscArrayzero(d_nnz, na)); 1880 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1881 /* compute local off-diagonal contributions */ 1882 PetscCall(PetscArrayzero(g_nnz, nb)); 1883 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1884 /* map those to global */ 1885 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1886 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1887 PetscCall(PetscSFSetFromOptions(sf)); 1888 PetscCall(PetscArrayzero(o_nnz, na)); 1889 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1890 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1891 PetscCall(PetscSFDestroy(&sf)); 1892 1893 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1894 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1895 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1896 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1897 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1898 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1899 } else { 1900 B = *matout; 1901 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1902 } 1903 1904 b = (Mat_MPIAIJ *)B->data; 1905 A_diag = a->A; 1906 B_diag = &b->A; 1907 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1908 A_diag_ncol = A_diag->cmap->N; 1909 B_diag_ilen = sub_B_diag->ilen; 1910 B_diag_i = sub_B_diag->i; 1911 1912 /* Set ilen for diagonal of B */ 1913 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1914 1915 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1916 very quickly (=without using MatSetValues), because all writes are local. */ 1917 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1918 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1919 1920 /* copy over the B part */ 1921 PetscCall(PetscMalloc1(bi[mb], &cols)); 1922 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1923 pbv = bv; 1924 row = A->rmap->rstart; 1925 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1926 cols_tmp = cols; 1927 for (i = 0; i < mb; i++) { 1928 ncol = bi[i + 1] - bi[i]; 1929 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1930 row++; 1931 pbv += ncol; 1932 cols_tmp += ncol; 1933 } 1934 PetscCall(PetscFree(cols)); 1935 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1936 1937 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1938 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1939 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1940 *matout = B; 1941 } else { 1942 PetscCall(MatHeaderMerge(A, &B)); 1943 } 1944 PetscFunctionReturn(0); 1945 } 1946 1947 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) { 1948 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1949 Mat a = aij->A, b = aij->B; 1950 PetscInt s1, s2, s3; 1951 1952 PetscFunctionBegin; 1953 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1954 if (rr) { 1955 PetscCall(VecGetLocalSize(rr, &s1)); 1956 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1957 /* Overlap communication with computation. */ 1958 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1959 } 1960 if (ll) { 1961 PetscCall(VecGetLocalSize(ll, &s1)); 1962 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1963 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1964 } 1965 /* scale the diagonal block */ 1966 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1967 1968 if (rr) { 1969 /* Do a scatter end and then right scale the off-diagonal block */ 1970 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1971 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1972 } 1973 PetscFunctionReturn(0); 1974 } 1975 1976 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) { 1977 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1978 1979 PetscFunctionBegin; 1980 PetscCall(MatSetUnfactored(a->A)); 1981 PetscFunctionReturn(0); 1982 } 1983 1984 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) { 1985 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 1986 Mat a, b, c, d; 1987 PetscBool flg; 1988 1989 PetscFunctionBegin; 1990 a = matA->A; 1991 b = matA->B; 1992 c = matB->A; 1993 d = matB->B; 1994 1995 PetscCall(MatEqual(a, c, &flg)); 1996 if (flg) PetscCall(MatEqual(b, d, &flg)); 1997 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) { 2002 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2003 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2004 2005 PetscFunctionBegin; 2006 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2007 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2008 /* because of the column compression in the off-processor part of the matrix a->B, 2009 the number of columns in a->B and b->B may be different, hence we cannot call 2010 the MatCopy() directly on the two parts. If need be, we can provide a more 2011 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2012 then copying the submatrices */ 2013 PetscCall(MatCopy_Basic(A, B, str)); 2014 } else { 2015 PetscCall(MatCopy(a->A, b->A, str)); 2016 PetscCall(MatCopy(a->B, b->B, str)); 2017 } 2018 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 PetscErrorCode MatSetUp_MPIAIJ(Mat A) { 2023 PetscFunctionBegin; 2024 PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 2025 PetscFunctionReturn(0); 2026 } 2027 2028 /* 2029 Computes the number of nonzeros per row needed for preallocation when X and Y 2030 have different nonzero structure. 2031 */ 2032 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) { 2033 PetscInt i, j, k, nzx, nzy; 2034 2035 PetscFunctionBegin; 2036 /* Set the number of nonzeros in the new matrix */ 2037 for (i = 0; i < m; i++) { 2038 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2039 nzx = xi[i + 1] - xi[i]; 2040 nzy = yi[i + 1] - yi[i]; 2041 nnz[i] = 0; 2042 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2043 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2044 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2045 nnz[i]++; 2046 } 2047 for (; k < nzy; k++) nnz[i]++; 2048 } 2049 PetscFunctionReturn(0); 2050 } 2051 2052 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2053 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) { 2054 PetscInt m = Y->rmap->N; 2055 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2056 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2057 2058 PetscFunctionBegin; 2059 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) { 2064 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2065 2066 PetscFunctionBegin; 2067 if (str == SAME_NONZERO_PATTERN) { 2068 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2069 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2070 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2071 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2072 } else { 2073 Mat B; 2074 PetscInt *nnz_d, *nnz_o; 2075 2076 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2077 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2078 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2079 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2080 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2081 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2082 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2083 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2084 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2085 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2086 PetscCall(MatHeaderMerge(Y, &B)); 2087 PetscCall(PetscFree(nnz_d)); 2088 PetscCall(PetscFree(nnz_o)); 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2094 2095 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) { 2096 PetscFunctionBegin; 2097 if (PetscDefined(USE_COMPLEX)) { 2098 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2099 2100 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2101 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2102 } 2103 PetscFunctionReturn(0); 2104 } 2105 2106 PetscErrorCode MatRealPart_MPIAIJ(Mat A) { 2107 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2108 2109 PetscFunctionBegin; 2110 PetscCall(MatRealPart(a->A)); 2111 PetscCall(MatRealPart(a->B)); 2112 PetscFunctionReturn(0); 2113 } 2114 2115 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) { 2116 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2117 2118 PetscFunctionBegin; 2119 PetscCall(MatImaginaryPart(a->A)); 2120 PetscCall(MatImaginaryPart(a->B)); 2121 PetscFunctionReturn(0); 2122 } 2123 2124 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) { 2125 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2126 PetscInt i, *idxb = NULL, m = A->rmap->n; 2127 PetscScalar *va, *vv; 2128 Vec vB, vA; 2129 const PetscScalar *vb; 2130 2131 PetscFunctionBegin; 2132 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2133 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2134 2135 PetscCall(VecGetArrayWrite(vA, &va)); 2136 if (idx) { 2137 for (i = 0; i < m; i++) { 2138 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2139 } 2140 } 2141 2142 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2143 PetscCall(PetscMalloc1(m, &idxb)); 2144 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2145 2146 PetscCall(VecGetArrayWrite(v, &vv)); 2147 PetscCall(VecGetArrayRead(vB, &vb)); 2148 for (i = 0; i < m; i++) { 2149 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2150 vv[i] = vb[i]; 2151 if (idx) idx[i] = a->garray[idxb[i]]; 2152 } else { 2153 vv[i] = va[i]; 2154 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2155 } 2156 } 2157 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2158 PetscCall(VecRestoreArrayWrite(vA, &va)); 2159 PetscCall(VecRestoreArrayRead(vB, &vb)); 2160 PetscCall(PetscFree(idxb)); 2161 PetscCall(VecDestroy(&vA)); 2162 PetscCall(VecDestroy(&vB)); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) { 2167 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2168 PetscInt m = A->rmap->n, n = A->cmap->n; 2169 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2170 PetscInt *cmap = mat->garray; 2171 PetscInt *diagIdx, *offdiagIdx; 2172 Vec diagV, offdiagV; 2173 PetscScalar *a, *diagA, *offdiagA; 2174 const PetscScalar *ba, *bav; 2175 PetscInt r, j, col, ncols, *bi, *bj; 2176 Mat B = mat->B; 2177 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2178 2179 PetscFunctionBegin; 2180 /* When a process holds entire A and other processes have no entry */ 2181 if (A->cmap->N == n) { 2182 PetscCall(VecGetArrayWrite(v, &diagA)); 2183 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2184 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2185 PetscCall(VecDestroy(&diagV)); 2186 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2187 PetscFunctionReturn(0); 2188 } else if (n == 0) { 2189 if (m) { 2190 PetscCall(VecGetArrayWrite(v, &a)); 2191 for (r = 0; r < m; r++) { 2192 a[r] = 0.0; 2193 if (idx) idx[r] = -1; 2194 } 2195 PetscCall(VecRestoreArrayWrite(v, &a)); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2201 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2202 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2203 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2204 2205 /* Get offdiagIdx[] for implicit 0.0 */ 2206 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2207 ba = bav; 2208 bi = b->i; 2209 bj = b->j; 2210 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2211 for (r = 0; r < m; r++) { 2212 ncols = bi[r + 1] - bi[r]; 2213 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2214 offdiagA[r] = *ba; 2215 offdiagIdx[r] = cmap[0]; 2216 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2217 offdiagA[r] = 0.0; 2218 2219 /* Find first hole in the cmap */ 2220 for (j = 0; j < ncols; j++) { 2221 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2222 if (col > j && j < cstart) { 2223 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2224 break; 2225 } else if (col > j + n && j >= cstart) { 2226 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2227 break; 2228 } 2229 } 2230 if (j == ncols && ncols < A->cmap->N - n) { 2231 /* a hole is outside compressed Bcols */ 2232 if (ncols == 0) { 2233 if (cstart) { 2234 offdiagIdx[r] = 0; 2235 } else offdiagIdx[r] = cend; 2236 } else { /* ncols > 0 */ 2237 offdiagIdx[r] = cmap[ncols - 1] + 1; 2238 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2239 } 2240 } 2241 } 2242 2243 for (j = 0; j < ncols; j++) { 2244 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2245 offdiagA[r] = *ba; 2246 offdiagIdx[r] = cmap[*bj]; 2247 } 2248 ba++; 2249 bj++; 2250 } 2251 } 2252 2253 PetscCall(VecGetArrayWrite(v, &a)); 2254 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2255 for (r = 0; r < m; ++r) { 2256 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2257 a[r] = diagA[r]; 2258 if (idx) idx[r] = cstart + diagIdx[r]; 2259 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2260 a[r] = diagA[r]; 2261 if (idx) { 2262 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2263 idx[r] = cstart + diagIdx[r]; 2264 } else idx[r] = offdiagIdx[r]; 2265 } 2266 } else { 2267 a[r] = offdiagA[r]; 2268 if (idx) idx[r] = offdiagIdx[r]; 2269 } 2270 } 2271 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2272 PetscCall(VecRestoreArrayWrite(v, &a)); 2273 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2274 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2275 PetscCall(VecDestroy(&diagV)); 2276 PetscCall(VecDestroy(&offdiagV)); 2277 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) { 2282 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2283 PetscInt m = A->rmap->n, n = A->cmap->n; 2284 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2285 PetscInt *cmap = mat->garray; 2286 PetscInt *diagIdx, *offdiagIdx; 2287 Vec diagV, offdiagV; 2288 PetscScalar *a, *diagA, *offdiagA; 2289 const PetscScalar *ba, *bav; 2290 PetscInt r, j, col, ncols, *bi, *bj; 2291 Mat B = mat->B; 2292 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2293 2294 PetscFunctionBegin; 2295 /* When a process holds entire A and other processes have no entry */ 2296 if (A->cmap->N == n) { 2297 PetscCall(VecGetArrayWrite(v, &diagA)); 2298 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2299 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2300 PetscCall(VecDestroy(&diagV)); 2301 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2302 PetscFunctionReturn(0); 2303 } else if (n == 0) { 2304 if (m) { 2305 PetscCall(VecGetArrayWrite(v, &a)); 2306 for (r = 0; r < m; r++) { 2307 a[r] = PETSC_MAX_REAL; 2308 if (idx) idx[r] = -1; 2309 } 2310 PetscCall(VecRestoreArrayWrite(v, &a)); 2311 } 2312 PetscFunctionReturn(0); 2313 } 2314 2315 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2316 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2317 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2318 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2319 2320 /* Get offdiagIdx[] for implicit 0.0 */ 2321 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2322 ba = bav; 2323 bi = b->i; 2324 bj = b->j; 2325 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2326 for (r = 0; r < m; r++) { 2327 ncols = bi[r + 1] - bi[r]; 2328 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2329 offdiagA[r] = *ba; 2330 offdiagIdx[r] = cmap[0]; 2331 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2332 offdiagA[r] = 0.0; 2333 2334 /* Find first hole in the cmap */ 2335 for (j = 0; j < ncols; j++) { 2336 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2337 if (col > j && j < cstart) { 2338 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2339 break; 2340 } else if (col > j + n && j >= cstart) { 2341 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2342 break; 2343 } 2344 } 2345 if (j == ncols && ncols < A->cmap->N - n) { 2346 /* a hole is outside compressed Bcols */ 2347 if (ncols == 0) { 2348 if (cstart) { 2349 offdiagIdx[r] = 0; 2350 } else offdiagIdx[r] = cend; 2351 } else { /* ncols > 0 */ 2352 offdiagIdx[r] = cmap[ncols - 1] + 1; 2353 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2354 } 2355 } 2356 } 2357 2358 for (j = 0; j < ncols; j++) { 2359 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2360 offdiagA[r] = *ba; 2361 offdiagIdx[r] = cmap[*bj]; 2362 } 2363 ba++; 2364 bj++; 2365 } 2366 } 2367 2368 PetscCall(VecGetArrayWrite(v, &a)); 2369 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2370 for (r = 0; r < m; ++r) { 2371 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2372 a[r] = diagA[r]; 2373 if (idx) idx[r] = cstart + diagIdx[r]; 2374 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2375 a[r] = diagA[r]; 2376 if (idx) { 2377 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2378 idx[r] = cstart + diagIdx[r]; 2379 } else idx[r] = offdiagIdx[r]; 2380 } 2381 } else { 2382 a[r] = offdiagA[r]; 2383 if (idx) idx[r] = offdiagIdx[r]; 2384 } 2385 } 2386 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2387 PetscCall(VecRestoreArrayWrite(v, &a)); 2388 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2389 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2390 PetscCall(VecDestroy(&diagV)); 2391 PetscCall(VecDestroy(&offdiagV)); 2392 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) { 2397 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2398 PetscInt m = A->rmap->n, n = A->cmap->n; 2399 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2400 PetscInt *cmap = mat->garray; 2401 PetscInt *diagIdx, *offdiagIdx; 2402 Vec diagV, offdiagV; 2403 PetscScalar *a, *diagA, *offdiagA; 2404 const PetscScalar *ba, *bav; 2405 PetscInt r, j, col, ncols, *bi, *bj; 2406 Mat B = mat->B; 2407 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2408 2409 PetscFunctionBegin; 2410 /* When a process holds entire A and other processes have no entry */ 2411 if (A->cmap->N == n) { 2412 PetscCall(VecGetArrayWrite(v, &diagA)); 2413 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2414 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2415 PetscCall(VecDestroy(&diagV)); 2416 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2417 PetscFunctionReturn(0); 2418 } else if (n == 0) { 2419 if (m) { 2420 PetscCall(VecGetArrayWrite(v, &a)); 2421 for (r = 0; r < m; r++) { 2422 a[r] = PETSC_MIN_REAL; 2423 if (idx) idx[r] = -1; 2424 } 2425 PetscCall(VecRestoreArrayWrite(v, &a)); 2426 } 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2431 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2432 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2433 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2434 2435 /* Get offdiagIdx[] for implicit 0.0 */ 2436 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2437 ba = bav; 2438 bi = b->i; 2439 bj = b->j; 2440 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2441 for (r = 0; r < m; r++) { 2442 ncols = bi[r + 1] - bi[r]; 2443 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2444 offdiagA[r] = *ba; 2445 offdiagIdx[r] = cmap[0]; 2446 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2447 offdiagA[r] = 0.0; 2448 2449 /* Find first hole in the cmap */ 2450 for (j = 0; j < ncols; j++) { 2451 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2452 if (col > j && j < cstart) { 2453 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2454 break; 2455 } else if (col > j + n && j >= cstart) { 2456 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2457 break; 2458 } 2459 } 2460 if (j == ncols && ncols < A->cmap->N - n) { 2461 /* a hole is outside compressed Bcols */ 2462 if (ncols == 0) { 2463 if (cstart) { 2464 offdiagIdx[r] = 0; 2465 } else offdiagIdx[r] = cend; 2466 } else { /* ncols > 0 */ 2467 offdiagIdx[r] = cmap[ncols - 1] + 1; 2468 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2469 } 2470 } 2471 } 2472 2473 for (j = 0; j < ncols; j++) { 2474 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2475 offdiagA[r] = *ba; 2476 offdiagIdx[r] = cmap[*bj]; 2477 } 2478 ba++; 2479 bj++; 2480 } 2481 } 2482 2483 PetscCall(VecGetArrayWrite(v, &a)); 2484 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2485 for (r = 0; r < m; ++r) { 2486 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2487 a[r] = diagA[r]; 2488 if (idx) idx[r] = cstart + diagIdx[r]; 2489 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2490 a[r] = diagA[r]; 2491 if (idx) { 2492 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2493 idx[r] = cstart + diagIdx[r]; 2494 } else idx[r] = offdiagIdx[r]; 2495 } 2496 } else { 2497 a[r] = offdiagA[r]; 2498 if (idx) idx[r] = offdiagIdx[r]; 2499 } 2500 } 2501 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2502 PetscCall(VecRestoreArrayWrite(v, &a)); 2503 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2504 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2505 PetscCall(VecDestroy(&diagV)); 2506 PetscCall(VecDestroy(&offdiagV)); 2507 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2508 PetscFunctionReturn(0); 2509 } 2510 2511 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) { 2512 Mat *dummy; 2513 2514 PetscFunctionBegin; 2515 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2516 *newmat = *dummy; 2517 PetscCall(PetscFree(dummy)); 2518 PetscFunctionReturn(0); 2519 } 2520 2521 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) { 2522 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2523 2524 PetscFunctionBegin; 2525 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2526 A->factorerrortype = a->A->factorerrortype; 2527 PetscFunctionReturn(0); 2528 } 2529 2530 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) { 2531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2532 2533 PetscFunctionBegin; 2534 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2535 PetscCall(MatSetRandom(aij->A, rctx)); 2536 if (x->assembled) { 2537 PetscCall(MatSetRandom(aij->B, rctx)); 2538 } else { 2539 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2540 } 2541 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2542 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) { 2547 PetscFunctionBegin; 2548 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2549 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2550 PetscFunctionReturn(0); 2551 } 2552 2553 /*@ 2554 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2555 2556 Not collective 2557 2558 Input Parameter: 2559 . A - the matrix 2560 2561 Output Parameter: 2562 . nz - the number of nonzeros 2563 2564 Level: advanced 2565 2566 .seealso: `MATMPIAIJ`, `Mat` 2567 @*/ 2568 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) { 2569 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2570 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2571 2572 PetscFunctionBegin; 2573 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2574 PetscFunctionReturn(0); 2575 } 2576 2577 /*@ 2578 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2579 2580 Collective on A 2581 2582 Input Parameters: 2583 + A - the matrix 2584 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2585 2586 Level: advanced 2587 2588 @*/ 2589 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) { 2590 PetscFunctionBegin; 2591 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) { 2596 PetscBool sc = PETSC_FALSE, flg; 2597 2598 PetscFunctionBegin; 2599 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2600 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2601 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2602 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2603 PetscOptionsHeadEnd(); 2604 PetscFunctionReturn(0); 2605 } 2606 2607 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) { 2608 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2609 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2610 2611 PetscFunctionBegin; 2612 if (!Y->preallocated) { 2613 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2614 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2615 PetscInt nonew = aij->nonew; 2616 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2617 aij->nonew = nonew; 2618 } 2619 PetscCall(MatShift_Basic(Y, a)); 2620 PetscFunctionReturn(0); 2621 } 2622 2623 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) { 2624 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2625 2626 PetscFunctionBegin; 2627 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2628 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2629 if (d) { 2630 PetscInt rstart; 2631 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2632 *d += rstart; 2633 } 2634 PetscFunctionReturn(0); 2635 } 2636 2637 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) { 2638 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2639 2640 PetscFunctionBegin; 2641 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2642 PetscFunctionReturn(0); 2643 } 2644 2645 /* -------------------------------------------------------------------*/ 2646 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2647 MatGetRow_MPIAIJ, 2648 MatRestoreRow_MPIAIJ, 2649 MatMult_MPIAIJ, 2650 /* 4*/ MatMultAdd_MPIAIJ, 2651 MatMultTranspose_MPIAIJ, 2652 MatMultTransposeAdd_MPIAIJ, 2653 NULL, 2654 NULL, 2655 NULL, 2656 /*10*/ NULL, 2657 NULL, 2658 NULL, 2659 MatSOR_MPIAIJ, 2660 MatTranspose_MPIAIJ, 2661 /*15*/ MatGetInfo_MPIAIJ, 2662 MatEqual_MPIAIJ, 2663 MatGetDiagonal_MPIAIJ, 2664 MatDiagonalScale_MPIAIJ, 2665 MatNorm_MPIAIJ, 2666 /*20*/ MatAssemblyBegin_MPIAIJ, 2667 MatAssemblyEnd_MPIAIJ, 2668 MatSetOption_MPIAIJ, 2669 MatZeroEntries_MPIAIJ, 2670 /*24*/ MatZeroRows_MPIAIJ, 2671 NULL, 2672 NULL, 2673 NULL, 2674 NULL, 2675 /*29*/ MatSetUp_MPIAIJ, 2676 NULL, 2677 NULL, 2678 MatGetDiagonalBlock_MPIAIJ, 2679 NULL, 2680 /*34*/ MatDuplicate_MPIAIJ, 2681 NULL, 2682 NULL, 2683 NULL, 2684 NULL, 2685 /*39*/ MatAXPY_MPIAIJ, 2686 MatCreateSubMatrices_MPIAIJ, 2687 MatIncreaseOverlap_MPIAIJ, 2688 MatGetValues_MPIAIJ, 2689 MatCopy_MPIAIJ, 2690 /*44*/ MatGetRowMax_MPIAIJ, 2691 MatScale_MPIAIJ, 2692 MatShift_MPIAIJ, 2693 MatDiagonalSet_MPIAIJ, 2694 MatZeroRowsColumns_MPIAIJ, 2695 /*49*/ MatSetRandom_MPIAIJ, 2696 MatGetRowIJ_MPIAIJ, 2697 MatRestoreRowIJ_MPIAIJ, 2698 NULL, 2699 NULL, 2700 /*54*/ MatFDColoringCreate_MPIXAIJ, 2701 NULL, 2702 MatSetUnfactored_MPIAIJ, 2703 MatPermute_MPIAIJ, 2704 NULL, 2705 /*59*/ MatCreateSubMatrix_MPIAIJ, 2706 MatDestroy_MPIAIJ, 2707 MatView_MPIAIJ, 2708 NULL, 2709 NULL, 2710 /*64*/ NULL, 2711 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2712 NULL, 2713 NULL, 2714 NULL, 2715 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2716 MatGetRowMinAbs_MPIAIJ, 2717 NULL, 2718 NULL, 2719 NULL, 2720 NULL, 2721 /*75*/ MatFDColoringApply_AIJ, 2722 MatSetFromOptions_MPIAIJ, 2723 NULL, 2724 NULL, 2725 MatFindZeroDiagonals_MPIAIJ, 2726 /*80*/ NULL, 2727 NULL, 2728 NULL, 2729 /*83*/ MatLoad_MPIAIJ, 2730 MatIsSymmetric_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 NULL, 2735 /*89*/ NULL, 2736 NULL, 2737 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2738 NULL, 2739 NULL, 2740 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2741 NULL, 2742 NULL, 2743 NULL, 2744 MatBindToCPU_MPIAIJ, 2745 /*99*/ MatProductSetFromOptions_MPIAIJ, 2746 NULL, 2747 NULL, 2748 MatConjugate_MPIAIJ, 2749 NULL, 2750 /*104*/ MatSetValuesRow_MPIAIJ, 2751 MatRealPart_MPIAIJ, 2752 MatImaginaryPart_MPIAIJ, 2753 NULL, 2754 NULL, 2755 /*109*/ NULL, 2756 NULL, 2757 MatGetRowMin_MPIAIJ, 2758 NULL, 2759 MatMissingDiagonal_MPIAIJ, 2760 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2761 NULL, 2762 MatGetGhosts_MPIAIJ, 2763 NULL, 2764 NULL, 2765 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2766 NULL, 2767 NULL, 2768 NULL, 2769 MatGetMultiProcBlock_MPIAIJ, 2770 /*124*/ MatFindNonzeroRows_MPIAIJ, 2771 MatGetColumnReductions_MPIAIJ, 2772 MatInvertBlockDiagonal_MPIAIJ, 2773 MatInvertVariableBlockDiagonal_MPIAIJ, 2774 MatCreateSubMatricesMPI_MPIAIJ, 2775 /*129*/ NULL, 2776 NULL, 2777 NULL, 2778 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2779 NULL, 2780 /*134*/ NULL, 2781 NULL, 2782 NULL, 2783 NULL, 2784 NULL, 2785 /*139*/ MatSetBlockSizes_MPIAIJ, 2786 NULL, 2787 NULL, 2788 MatFDColoringSetUp_MPIXAIJ, 2789 MatFindOffBlockDiagonalEntries_MPIAIJ, 2790 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2791 /*145*/ NULL, 2792 NULL, 2793 NULL, 2794 MatCreateGraph_Simple_AIJ, 2795 MatFilter_AIJ, 2796 /*150*/ NULL}; 2797 2798 /* ----------------------------------------------------------------------------------------*/ 2799 2800 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) { 2801 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2802 2803 PetscFunctionBegin; 2804 PetscCall(MatStoreValues(aij->A)); 2805 PetscCall(MatStoreValues(aij->B)); 2806 PetscFunctionReturn(0); 2807 } 2808 2809 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) { 2810 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2811 2812 PetscFunctionBegin; 2813 PetscCall(MatRetrieveValues(aij->A)); 2814 PetscCall(MatRetrieveValues(aij->B)); 2815 PetscFunctionReturn(0); 2816 } 2817 2818 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) { 2819 Mat_MPIAIJ *b; 2820 PetscMPIInt size; 2821 2822 PetscFunctionBegin; 2823 PetscCall(PetscLayoutSetUp(B->rmap)); 2824 PetscCall(PetscLayoutSetUp(B->cmap)); 2825 b = (Mat_MPIAIJ *)B->data; 2826 2827 #if defined(PETSC_USE_CTABLE) 2828 PetscCall(PetscTableDestroy(&b->colmap)); 2829 #else 2830 PetscCall(PetscFree(b->colmap)); 2831 #endif 2832 PetscCall(PetscFree(b->garray)); 2833 PetscCall(VecDestroy(&b->lvec)); 2834 PetscCall(VecScatterDestroy(&b->Mvctx)); 2835 2836 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2837 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2838 PetscCall(MatDestroy(&b->B)); 2839 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2840 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2841 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2842 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2843 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->B)); 2844 2845 if (!B->preallocated) { 2846 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2847 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2848 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2849 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2850 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->A)); 2851 } 2852 2853 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2854 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2855 B->preallocated = PETSC_TRUE; 2856 B->was_assembled = PETSC_FALSE; 2857 B->assembled = PETSC_FALSE; 2858 PetscFunctionReturn(0); 2859 } 2860 2861 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) { 2862 Mat_MPIAIJ *b; 2863 2864 PetscFunctionBegin; 2865 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2866 PetscCall(PetscLayoutSetUp(B->rmap)); 2867 PetscCall(PetscLayoutSetUp(B->cmap)); 2868 b = (Mat_MPIAIJ *)B->data; 2869 2870 #if defined(PETSC_USE_CTABLE) 2871 PetscCall(PetscTableDestroy(&b->colmap)); 2872 #else 2873 PetscCall(PetscFree(b->colmap)); 2874 #endif 2875 PetscCall(PetscFree(b->garray)); 2876 PetscCall(VecDestroy(&b->lvec)); 2877 PetscCall(VecScatterDestroy(&b->Mvctx)); 2878 2879 PetscCall(MatResetPreallocation(b->A)); 2880 PetscCall(MatResetPreallocation(b->B)); 2881 B->preallocated = PETSC_TRUE; 2882 B->was_assembled = PETSC_FALSE; 2883 B->assembled = PETSC_FALSE; 2884 PetscFunctionReturn(0); 2885 } 2886 2887 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) { 2888 Mat mat; 2889 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2890 2891 PetscFunctionBegin; 2892 *newmat = NULL; 2893 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2894 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2895 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2896 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2897 a = (Mat_MPIAIJ *)mat->data; 2898 2899 mat->factortype = matin->factortype; 2900 mat->assembled = matin->assembled; 2901 mat->insertmode = NOT_SET_VALUES; 2902 mat->preallocated = matin->preallocated; 2903 2904 a->size = oldmat->size; 2905 a->rank = oldmat->rank; 2906 a->donotstash = oldmat->donotstash; 2907 a->roworiented = oldmat->roworiented; 2908 a->rowindices = NULL; 2909 a->rowvalues = NULL; 2910 a->getrowactive = PETSC_FALSE; 2911 2912 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2913 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2914 2915 if (oldmat->colmap) { 2916 #if defined(PETSC_USE_CTABLE) 2917 PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap)); 2918 #else 2919 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2920 PetscCall(PetscLogObjectMemory((PetscObject)mat, (mat->cmap->N) * sizeof(PetscInt))); 2921 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2922 #endif 2923 } else a->colmap = NULL; 2924 if (oldmat->garray) { 2925 PetscInt len; 2926 len = oldmat->B->cmap->n; 2927 PetscCall(PetscMalloc1(len + 1, &a->garray)); 2928 PetscCall(PetscLogObjectMemory((PetscObject)mat, len * sizeof(PetscInt))); 2929 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2930 } else a->garray = NULL; 2931 2932 /* It may happen MatDuplicate is called with a non-assembled matrix 2933 In fact, MatDuplicate only requires the matrix to be preallocated 2934 This may happen inside a DMCreateMatrix_Shell */ 2935 if (oldmat->lvec) { 2936 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2937 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->lvec)); 2938 } 2939 if (oldmat->Mvctx) { 2940 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2941 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->Mvctx)); 2942 } 2943 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2944 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A)); 2945 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2946 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->B)); 2947 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2948 *newmat = mat; 2949 PetscFunctionReturn(0); 2950 } 2951 2952 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) { 2953 PetscBool isbinary, ishdf5; 2954 2955 PetscFunctionBegin; 2956 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2957 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2958 /* force binary viewer to load .info file if it has not yet done so */ 2959 PetscCall(PetscViewerSetUp(viewer)); 2960 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2961 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2962 if (isbinary) { 2963 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 2964 } else if (ishdf5) { 2965 #if defined(PETSC_HAVE_HDF5) 2966 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 2967 #else 2968 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2969 #endif 2970 } else { 2971 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2972 } 2973 PetscFunctionReturn(0); 2974 } 2975 2976 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) { 2977 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 2978 PetscInt *rowidxs, *colidxs; 2979 PetscScalar *matvals; 2980 2981 PetscFunctionBegin; 2982 PetscCall(PetscViewerSetUp(viewer)); 2983 2984 /* read in matrix header */ 2985 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 2986 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 2987 M = header[1]; 2988 N = header[2]; 2989 nz = header[3]; 2990 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 2991 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 2992 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 2993 2994 /* set block sizes from the viewer's .info file */ 2995 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 2996 /* set global sizes if not set already */ 2997 if (mat->rmap->N < 0) mat->rmap->N = M; 2998 if (mat->cmap->N < 0) mat->cmap->N = N; 2999 PetscCall(PetscLayoutSetUp(mat->rmap)); 3000 PetscCall(PetscLayoutSetUp(mat->cmap)); 3001 3002 /* check if the matrix sizes are correct */ 3003 PetscCall(MatGetSize(mat, &rows, &cols)); 3004 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3005 3006 /* read in row lengths and build row indices */ 3007 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3008 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3009 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3010 rowidxs[0] = 0; 3011 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3012 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3013 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3014 /* read in column indices and matrix values */ 3015 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3016 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3017 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3018 /* store matrix indices and values */ 3019 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3020 PetscCall(PetscFree(rowidxs)); 3021 PetscCall(PetscFree2(colidxs, matvals)); 3022 PetscFunctionReturn(0); 3023 } 3024 3025 /* Not scalable because of ISAllGather() unless getting all columns. */ 3026 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) { 3027 IS iscol_local; 3028 PetscBool isstride; 3029 PetscMPIInt lisstride = 0, gisstride; 3030 3031 PetscFunctionBegin; 3032 /* check if we are grabbing all columns*/ 3033 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3034 3035 if (isstride) { 3036 PetscInt start, len, mstart, mlen; 3037 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3038 PetscCall(ISGetLocalSize(iscol, &len)); 3039 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3040 if (mstart == start && mlen - mstart == len) lisstride = 1; 3041 } 3042 3043 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3044 if (gisstride) { 3045 PetscInt N; 3046 PetscCall(MatGetSize(mat, NULL, &N)); 3047 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3048 PetscCall(ISSetIdentity(iscol_local)); 3049 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3050 } else { 3051 PetscInt cbs; 3052 PetscCall(ISGetBlockSize(iscol, &cbs)); 3053 PetscCall(ISAllGather(iscol, &iscol_local)); 3054 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3055 } 3056 3057 *isseq = iscol_local; 3058 PetscFunctionReturn(0); 3059 } 3060 3061 /* 3062 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3063 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3064 3065 Input Parameters: 3066 mat - matrix 3067 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3068 i.e., mat->rstart <= isrow[i] < mat->rend 3069 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3070 i.e., mat->cstart <= iscol[i] < mat->cend 3071 Output Parameter: 3072 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3073 iscol_o - sequential column index set for retrieving mat->B 3074 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3075 */ 3076 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) { 3077 Vec x, cmap; 3078 const PetscInt *is_idx; 3079 PetscScalar *xarray, *cmaparray; 3080 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3081 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3082 Mat B = a->B; 3083 Vec lvec = a->lvec, lcmap; 3084 PetscInt i, cstart, cend, Bn = B->cmap->N; 3085 MPI_Comm comm; 3086 VecScatter Mvctx = a->Mvctx; 3087 3088 PetscFunctionBegin; 3089 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3090 PetscCall(ISGetLocalSize(iscol, &ncols)); 3091 3092 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3093 PetscCall(MatCreateVecs(mat, &x, NULL)); 3094 PetscCall(VecSet(x, -1.0)); 3095 PetscCall(VecDuplicate(x, &cmap)); 3096 PetscCall(VecSet(cmap, -1.0)); 3097 3098 /* Get start indices */ 3099 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3100 isstart -= ncols; 3101 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3102 3103 PetscCall(ISGetIndices(iscol, &is_idx)); 3104 PetscCall(VecGetArray(x, &xarray)); 3105 PetscCall(VecGetArray(cmap, &cmaparray)); 3106 PetscCall(PetscMalloc1(ncols, &idx)); 3107 for (i = 0; i < ncols; i++) { 3108 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3109 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3110 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3111 } 3112 PetscCall(VecRestoreArray(x, &xarray)); 3113 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3114 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3115 3116 /* Get iscol_d */ 3117 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3118 PetscCall(ISGetBlockSize(iscol, &i)); 3119 PetscCall(ISSetBlockSize(*iscol_d, i)); 3120 3121 /* Get isrow_d */ 3122 PetscCall(ISGetLocalSize(isrow, &m)); 3123 rstart = mat->rmap->rstart; 3124 PetscCall(PetscMalloc1(m, &idx)); 3125 PetscCall(ISGetIndices(isrow, &is_idx)); 3126 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3127 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3128 3129 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3130 PetscCall(ISGetBlockSize(isrow, &i)); 3131 PetscCall(ISSetBlockSize(*isrow_d, i)); 3132 3133 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3134 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3135 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3136 3137 PetscCall(VecDuplicate(lvec, &lcmap)); 3138 3139 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3140 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3141 3142 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3143 /* off-process column indices */ 3144 count = 0; 3145 PetscCall(PetscMalloc1(Bn, &idx)); 3146 PetscCall(PetscMalloc1(Bn, &cmap1)); 3147 3148 PetscCall(VecGetArray(lvec, &xarray)); 3149 PetscCall(VecGetArray(lcmap, &cmaparray)); 3150 for (i = 0; i < Bn; i++) { 3151 if (PetscRealPart(xarray[i]) > -1.0) { 3152 idx[count] = i; /* local column index in off-diagonal part B */ 3153 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3154 count++; 3155 } 3156 } 3157 PetscCall(VecRestoreArray(lvec, &xarray)); 3158 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3159 3160 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3161 /* cannot ensure iscol_o has same blocksize as iscol! */ 3162 3163 PetscCall(PetscFree(idx)); 3164 *garray = cmap1; 3165 3166 PetscCall(VecDestroy(&x)); 3167 PetscCall(VecDestroy(&cmap)); 3168 PetscCall(VecDestroy(&lcmap)); 3169 PetscFunctionReturn(0); 3170 } 3171 3172 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3173 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) { 3174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3175 Mat M = NULL; 3176 MPI_Comm comm; 3177 IS iscol_d, isrow_d, iscol_o; 3178 Mat Asub = NULL, Bsub = NULL; 3179 PetscInt n; 3180 3181 PetscFunctionBegin; 3182 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3183 3184 if (call == MAT_REUSE_MATRIX) { 3185 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3186 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3187 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3188 3189 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3190 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3191 3192 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3193 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3194 3195 /* Update diagonal and off-diagonal portions of submat */ 3196 asub = (Mat_MPIAIJ *)(*submat)->data; 3197 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3198 PetscCall(ISGetLocalSize(iscol_o, &n)); 3199 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3200 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3201 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3202 3203 } else { /* call == MAT_INITIAL_MATRIX) */ 3204 const PetscInt *garray; 3205 PetscInt BsubN; 3206 3207 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3208 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3209 3210 /* Create local submatrices Asub and Bsub */ 3211 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3212 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3213 3214 /* Create submatrix M */ 3215 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3216 3217 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3218 asub = (Mat_MPIAIJ *)M->data; 3219 3220 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3221 n = asub->B->cmap->N; 3222 if (BsubN > n) { 3223 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3224 const PetscInt *idx; 3225 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3226 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3227 3228 PetscCall(PetscMalloc1(n, &idx_new)); 3229 j = 0; 3230 PetscCall(ISGetIndices(iscol_o, &idx)); 3231 for (i = 0; i < n; i++) { 3232 if (j >= BsubN) break; 3233 while (subgarray[i] > garray[j]) j++; 3234 3235 if (subgarray[i] == garray[j]) { 3236 idx_new[i] = idx[j++]; 3237 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3238 } 3239 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3240 3241 PetscCall(ISDestroy(&iscol_o)); 3242 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3243 3244 } else if (BsubN < n) { 3245 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3246 } 3247 3248 PetscCall(PetscFree(garray)); 3249 *submat = M; 3250 3251 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3252 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3253 PetscCall(ISDestroy(&isrow_d)); 3254 3255 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3256 PetscCall(ISDestroy(&iscol_d)); 3257 3258 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3259 PetscCall(ISDestroy(&iscol_o)); 3260 } 3261 PetscFunctionReturn(0); 3262 } 3263 3264 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) { 3265 IS iscol_local = NULL, isrow_d; 3266 PetscInt csize; 3267 PetscInt n, i, j, start, end; 3268 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3269 MPI_Comm comm; 3270 3271 PetscFunctionBegin; 3272 /* If isrow has same processor distribution as mat, 3273 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3274 if (call == MAT_REUSE_MATRIX) { 3275 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3276 if (isrow_d) { 3277 sameRowDist = PETSC_TRUE; 3278 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3279 } else { 3280 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3281 if (iscol_local) { 3282 sameRowDist = PETSC_TRUE; 3283 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3284 } 3285 } 3286 } else { 3287 /* Check if isrow has same processor distribution as mat */ 3288 sameDist[0] = PETSC_FALSE; 3289 PetscCall(ISGetLocalSize(isrow, &n)); 3290 if (!n) { 3291 sameDist[0] = PETSC_TRUE; 3292 } else { 3293 PetscCall(ISGetMinMax(isrow, &i, &j)); 3294 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3295 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3296 } 3297 3298 /* Check if iscol has same processor distribution as mat */ 3299 sameDist[1] = PETSC_FALSE; 3300 PetscCall(ISGetLocalSize(iscol, &n)); 3301 if (!n) { 3302 sameDist[1] = PETSC_TRUE; 3303 } else { 3304 PetscCall(ISGetMinMax(iscol, &i, &j)); 3305 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3306 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3307 } 3308 3309 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3310 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3311 sameRowDist = tsameDist[0]; 3312 } 3313 3314 if (sameRowDist) { 3315 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3316 /* isrow and iscol have same processor distribution as mat */ 3317 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3318 PetscFunctionReturn(0); 3319 } else { /* sameRowDist */ 3320 /* isrow has same processor distribution as mat */ 3321 if (call == MAT_INITIAL_MATRIX) { 3322 PetscBool sorted; 3323 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3324 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3325 PetscCall(ISGetSize(iscol, &i)); 3326 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3327 3328 PetscCall(ISSorted(iscol_local, &sorted)); 3329 if (sorted) { 3330 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3331 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3332 PetscFunctionReturn(0); 3333 } 3334 } else { /* call == MAT_REUSE_MATRIX */ 3335 IS iscol_sub; 3336 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3337 if (iscol_sub) { 3338 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3339 PetscFunctionReturn(0); 3340 } 3341 } 3342 } 3343 } 3344 3345 /* General case: iscol -> iscol_local which has global size of iscol */ 3346 if (call == MAT_REUSE_MATRIX) { 3347 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3348 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3349 } else { 3350 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3351 } 3352 3353 PetscCall(ISGetLocalSize(iscol, &csize)); 3354 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3355 3356 if (call == MAT_INITIAL_MATRIX) { 3357 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3358 PetscCall(ISDestroy(&iscol_local)); 3359 } 3360 PetscFunctionReturn(0); 3361 } 3362 3363 /*@C 3364 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3365 and "off-diagonal" part of the matrix in CSR format. 3366 3367 Collective 3368 3369 Input Parameters: 3370 + comm - MPI communicator 3371 . A - "diagonal" portion of matrix 3372 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3373 - garray - global index of B columns 3374 3375 Output Parameter: 3376 . mat - the matrix, with input A as its local diagonal matrix 3377 Level: advanced 3378 3379 Notes: 3380 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3381 3382 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3383 3384 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3385 @*/ 3386 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) { 3387 Mat_MPIAIJ *maij; 3388 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3389 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3390 const PetscScalar *oa; 3391 Mat Bnew; 3392 PetscInt m, n, N; 3393 MatType mpi_mat_type; 3394 3395 PetscFunctionBegin; 3396 PetscCall(MatCreate(comm, mat)); 3397 PetscCall(MatGetSize(A, &m, &n)); 3398 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3399 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3400 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3401 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3402 3403 /* Get global columns of mat */ 3404 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3405 3406 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3407 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3408 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3409 PetscCall(MatSetType(*mat, mpi_mat_type)); 3410 3411 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3412 maij = (Mat_MPIAIJ *)(*mat)->data; 3413 3414 (*mat)->preallocated = PETSC_TRUE; 3415 3416 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3417 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3418 3419 /* Set A as diagonal portion of *mat */ 3420 maij->A = A; 3421 3422 nz = oi[m]; 3423 for (i = 0; i < nz; i++) { 3424 col = oj[i]; 3425 oj[i] = garray[col]; 3426 } 3427 3428 /* Set Bnew as off-diagonal portion of *mat */ 3429 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3430 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3431 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3432 bnew = (Mat_SeqAIJ *)Bnew->data; 3433 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3434 maij->B = Bnew; 3435 3436 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3437 3438 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3439 b->free_a = PETSC_FALSE; 3440 b->free_ij = PETSC_FALSE; 3441 PetscCall(MatDestroy(&B)); 3442 3443 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3444 bnew->free_a = PETSC_TRUE; 3445 bnew->free_ij = PETSC_TRUE; 3446 3447 /* condense columns of maij->B */ 3448 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3449 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3450 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3451 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3452 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3453 PetscFunctionReturn(0); 3454 } 3455 3456 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3457 3458 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) { 3459 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3460 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3461 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3462 Mat M, Msub, B = a->B; 3463 MatScalar *aa; 3464 Mat_SeqAIJ *aij; 3465 PetscInt *garray = a->garray, *colsub, Ncols; 3466 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3467 IS iscol_sub, iscmap; 3468 const PetscInt *is_idx, *cmap; 3469 PetscBool allcolumns = PETSC_FALSE; 3470 MPI_Comm comm; 3471 3472 PetscFunctionBegin; 3473 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3474 if (call == MAT_REUSE_MATRIX) { 3475 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3476 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3477 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3478 3479 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3480 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3481 3482 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3483 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3484 3485 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3486 3487 } else { /* call == MAT_INITIAL_MATRIX) */ 3488 PetscBool flg; 3489 3490 PetscCall(ISGetLocalSize(iscol, &n)); 3491 PetscCall(ISGetSize(iscol, &Ncols)); 3492 3493 /* (1) iscol -> nonscalable iscol_local */ 3494 /* Check for special case: each processor gets entire matrix columns */ 3495 PetscCall(ISIdentity(iscol_local, &flg)); 3496 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3497 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3498 if (allcolumns) { 3499 iscol_sub = iscol_local; 3500 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3501 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3502 3503 } else { 3504 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3505 PetscInt *idx, *cmap1, k; 3506 PetscCall(PetscMalloc1(Ncols, &idx)); 3507 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3508 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3509 count = 0; 3510 k = 0; 3511 for (i = 0; i < Ncols; i++) { 3512 j = is_idx[i]; 3513 if (j >= cstart && j < cend) { 3514 /* diagonal part of mat */ 3515 idx[count] = j; 3516 cmap1[count++] = i; /* column index in submat */ 3517 } else if (Bn) { 3518 /* off-diagonal part of mat */ 3519 if (j == garray[k]) { 3520 idx[count] = j; 3521 cmap1[count++] = i; /* column index in submat */ 3522 } else if (j > garray[k]) { 3523 while (j > garray[k] && k < Bn - 1) k++; 3524 if (j == garray[k]) { 3525 idx[count] = j; 3526 cmap1[count++] = i; /* column index in submat */ 3527 } 3528 } 3529 } 3530 } 3531 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3532 3533 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3534 PetscCall(ISGetBlockSize(iscol, &cbs)); 3535 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3536 3537 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3538 } 3539 3540 /* (3) Create sequential Msub */ 3541 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3542 } 3543 3544 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3545 aij = (Mat_SeqAIJ *)(Msub)->data; 3546 ii = aij->i; 3547 PetscCall(ISGetIndices(iscmap, &cmap)); 3548 3549 /* 3550 m - number of local rows 3551 Ncols - number of columns (same on all processors) 3552 rstart - first row in new global matrix generated 3553 */ 3554 PetscCall(MatGetSize(Msub, &m, NULL)); 3555 3556 if (call == MAT_INITIAL_MATRIX) { 3557 /* (4) Create parallel newmat */ 3558 PetscMPIInt rank, size; 3559 PetscInt csize; 3560 3561 PetscCallMPI(MPI_Comm_size(comm, &size)); 3562 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3563 3564 /* 3565 Determine the number of non-zeros in the diagonal and off-diagonal 3566 portions of the matrix in order to do correct preallocation 3567 */ 3568 3569 /* first get start and end of "diagonal" columns */ 3570 PetscCall(ISGetLocalSize(iscol, &csize)); 3571 if (csize == PETSC_DECIDE) { 3572 PetscCall(ISGetSize(isrow, &mglobal)); 3573 if (mglobal == Ncols) { /* square matrix */ 3574 nlocal = m; 3575 } else { 3576 nlocal = Ncols / size + ((Ncols % size) > rank); 3577 } 3578 } else { 3579 nlocal = csize; 3580 } 3581 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3582 rstart = rend - nlocal; 3583 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3584 3585 /* next, compute all the lengths */ 3586 jj = aij->j; 3587 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3588 olens = dlens + m; 3589 for (i = 0; i < m; i++) { 3590 jend = ii[i + 1] - ii[i]; 3591 olen = 0; 3592 dlen = 0; 3593 for (j = 0; j < jend; j++) { 3594 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3595 else dlen++; 3596 jj++; 3597 } 3598 olens[i] = olen; 3599 dlens[i] = dlen; 3600 } 3601 3602 PetscCall(ISGetBlockSize(isrow, &bs)); 3603 PetscCall(ISGetBlockSize(iscol, &cbs)); 3604 3605 PetscCall(MatCreate(comm, &M)); 3606 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3607 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3608 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3609 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3610 PetscCall(PetscFree(dlens)); 3611 3612 } else { /* call == MAT_REUSE_MATRIX */ 3613 M = *newmat; 3614 PetscCall(MatGetLocalSize(M, &i, NULL)); 3615 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3616 PetscCall(MatZeroEntries(M)); 3617 /* 3618 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3619 rather than the slower MatSetValues(). 3620 */ 3621 M->was_assembled = PETSC_TRUE; 3622 M->assembled = PETSC_FALSE; 3623 } 3624 3625 /* (5) Set values of Msub to *newmat */ 3626 PetscCall(PetscMalloc1(count, &colsub)); 3627 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3628 3629 jj = aij->j; 3630 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3631 for (i = 0; i < m; i++) { 3632 row = rstart + i; 3633 nz = ii[i + 1] - ii[i]; 3634 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3635 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3636 jj += nz; 3637 aa += nz; 3638 } 3639 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3640 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3641 3642 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3643 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3644 3645 PetscCall(PetscFree(colsub)); 3646 3647 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3648 if (call == MAT_INITIAL_MATRIX) { 3649 *newmat = M; 3650 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3651 PetscCall(MatDestroy(&Msub)); 3652 3653 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3654 PetscCall(ISDestroy(&iscol_sub)); 3655 3656 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3657 PetscCall(ISDestroy(&iscmap)); 3658 3659 if (iscol_local) { 3660 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3661 PetscCall(ISDestroy(&iscol_local)); 3662 } 3663 } 3664 PetscFunctionReturn(0); 3665 } 3666 3667 /* 3668 Not great since it makes two copies of the submatrix, first an SeqAIJ 3669 in local and then by concatenating the local matrices the end result. 3670 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3671 3672 This requires a sequential iscol with all indices. 3673 */ 3674 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) { 3675 PetscMPIInt rank, size; 3676 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3677 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3678 Mat M, Mreuse; 3679 MatScalar *aa, *vwork; 3680 MPI_Comm comm; 3681 Mat_SeqAIJ *aij; 3682 PetscBool colflag, allcolumns = PETSC_FALSE; 3683 3684 PetscFunctionBegin; 3685 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3686 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3687 PetscCallMPI(MPI_Comm_size(comm, &size)); 3688 3689 /* Check for special case: each processor gets entire matrix columns */ 3690 PetscCall(ISIdentity(iscol, &colflag)); 3691 PetscCall(ISGetLocalSize(iscol, &n)); 3692 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3693 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3694 3695 if (call == MAT_REUSE_MATRIX) { 3696 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3697 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3698 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3699 } else { 3700 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3701 } 3702 3703 /* 3704 m - number of local rows 3705 n - number of columns (same on all processors) 3706 rstart - first row in new global matrix generated 3707 */ 3708 PetscCall(MatGetSize(Mreuse, &m, &n)); 3709 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3710 if (call == MAT_INITIAL_MATRIX) { 3711 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3712 ii = aij->i; 3713 jj = aij->j; 3714 3715 /* 3716 Determine the number of non-zeros in the diagonal and off-diagonal 3717 portions of the matrix in order to do correct preallocation 3718 */ 3719 3720 /* first get start and end of "diagonal" columns */ 3721 if (csize == PETSC_DECIDE) { 3722 PetscCall(ISGetSize(isrow, &mglobal)); 3723 if (mglobal == n) { /* square matrix */ 3724 nlocal = m; 3725 } else { 3726 nlocal = n / size + ((n % size) > rank); 3727 } 3728 } else { 3729 nlocal = csize; 3730 } 3731 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3732 rstart = rend - nlocal; 3733 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3734 3735 /* next, compute all the lengths */ 3736 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3737 olens = dlens + m; 3738 for (i = 0; i < m; i++) { 3739 jend = ii[i + 1] - ii[i]; 3740 olen = 0; 3741 dlen = 0; 3742 for (j = 0; j < jend; j++) { 3743 if (*jj < rstart || *jj >= rend) olen++; 3744 else dlen++; 3745 jj++; 3746 } 3747 olens[i] = olen; 3748 dlens[i] = dlen; 3749 } 3750 PetscCall(MatCreate(comm, &M)); 3751 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3752 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3753 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3754 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3755 PetscCall(PetscFree(dlens)); 3756 } else { 3757 PetscInt ml, nl; 3758 3759 M = *newmat; 3760 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3761 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3762 PetscCall(MatZeroEntries(M)); 3763 /* 3764 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3765 rather than the slower MatSetValues(). 3766 */ 3767 M->was_assembled = PETSC_TRUE; 3768 M->assembled = PETSC_FALSE; 3769 } 3770 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3771 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3772 ii = aij->i; 3773 jj = aij->j; 3774 3775 /* trigger copy to CPU if needed */ 3776 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3777 for (i = 0; i < m; i++) { 3778 row = rstart + i; 3779 nz = ii[i + 1] - ii[i]; 3780 cwork = jj; 3781 jj += nz; 3782 vwork = aa; 3783 aa += nz; 3784 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3785 } 3786 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3787 3788 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3789 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3790 *newmat = M; 3791 3792 /* save submatrix used in processor for next request */ 3793 if (call == MAT_INITIAL_MATRIX) { 3794 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3795 PetscCall(MatDestroy(&Mreuse)); 3796 } 3797 PetscFunctionReturn(0); 3798 } 3799 3800 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) { 3801 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3802 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3803 const PetscInt *JJ; 3804 PetscBool nooffprocentries; 3805 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3806 3807 PetscFunctionBegin; 3808 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3809 3810 PetscCall(PetscLayoutSetUp(B->rmap)); 3811 PetscCall(PetscLayoutSetUp(B->cmap)); 3812 m = B->rmap->n; 3813 cstart = B->cmap->rstart; 3814 cend = B->cmap->rend; 3815 rstart = B->rmap->rstart; 3816 3817 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3818 3819 if (PetscDefined(USE_DEBUG)) { 3820 for (i = 0; i < m; i++) { 3821 nnz = Ii[i + 1] - Ii[i]; 3822 JJ = J + Ii[i]; 3823 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3824 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3825 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3826 } 3827 } 3828 3829 for (i = 0; i < m; i++) { 3830 nnz = Ii[i + 1] - Ii[i]; 3831 JJ = J + Ii[i]; 3832 nnz_max = PetscMax(nnz_max, nnz); 3833 d = 0; 3834 for (j = 0; j < nnz; j++) { 3835 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3836 } 3837 d_nnz[i] = d; 3838 o_nnz[i] = nnz - d; 3839 } 3840 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3841 PetscCall(PetscFree2(d_nnz, o_nnz)); 3842 3843 for (i = 0; i < m; i++) { 3844 ii = i + rstart; 3845 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3846 } 3847 nooffprocentries = B->nooffprocentries; 3848 B->nooffprocentries = PETSC_TRUE; 3849 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3850 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3851 B->nooffprocentries = nooffprocentries; 3852 3853 /* count number of entries below block diagonal */ 3854 PetscCall(PetscFree(Aij->ld)); 3855 PetscCall(PetscCalloc1(m, &ld)); 3856 Aij->ld = ld; 3857 for (i = 0; i < m; i++) { 3858 nnz = Ii[i + 1] - Ii[i]; 3859 j = 0; 3860 while (j < nnz && J[j] < cstart) j++; 3861 ld[i] = j; 3862 J += nnz; 3863 } 3864 3865 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3866 PetscFunctionReturn(0); 3867 } 3868 3869 /*@ 3870 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3871 (the default parallel PETSc format). 3872 3873 Collective 3874 3875 Input Parameters: 3876 + B - the matrix 3877 . i - the indices into j for the start of each local row (starts with zero) 3878 . j - the column indices for each local row (starts with zero) 3879 - v - optional values in the matrix 3880 3881 Level: developer 3882 3883 Notes: 3884 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3885 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3886 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3887 3888 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3889 3890 The format which is used for the sparse matrix input, is equivalent to a 3891 row-major ordering.. i.e for the following matrix, the input data expected is 3892 as shown 3893 3894 $ 1 0 0 3895 $ 2 0 3 P0 3896 $ ------- 3897 $ 4 5 6 P1 3898 $ 3899 $ Process0 [P0]: rows_owned=[0,1] 3900 $ i = {0,1,3} [size = nrow+1 = 2+1] 3901 $ j = {0,0,2} [size = 3] 3902 $ v = {1,2,3} [size = 3] 3903 $ 3904 $ Process1 [P1]: rows_owned=[2] 3905 $ i = {0,3} [size = nrow+1 = 1+1] 3906 $ j = {0,1,2} [size = 3] 3907 $ v = {4,5,6} [size = 3] 3908 3909 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3910 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3911 @*/ 3912 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) { 3913 PetscFunctionBegin; 3914 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3915 PetscFunctionReturn(0); 3916 } 3917 3918 /*@C 3919 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3920 (the default parallel PETSc format). For good matrix assembly performance 3921 the user should preallocate the matrix storage by setting the parameters 3922 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3923 performance can be increased by more than a factor of 50. 3924 3925 Collective 3926 3927 Input Parameters: 3928 + B - the matrix 3929 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3930 (same value is used for all local rows) 3931 . d_nnz - array containing the number of nonzeros in the various rows of the 3932 DIAGONAL portion of the local submatrix (possibly different for each row) 3933 or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure. 3934 The size of this array is equal to the number of local rows, i.e 'm'. 3935 For matrices that will be factored, you must leave room for (and set) 3936 the diagonal entry even if it is zero. 3937 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3938 submatrix (same value is used for all local rows). 3939 - o_nnz - array containing the number of nonzeros in the various rows of the 3940 OFF-DIAGONAL portion of the local submatrix (possibly different for 3941 each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero 3942 structure. The size of this array is equal to the number 3943 of local rows, i.e 'm'. 3944 3945 If the *_nnz parameter is given then the *_nz parameter is ignored 3946 3947 The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77 3948 storage. The stored row and column indices begin with zero. 3949 See Users-Manual: ch_mat for details. 3950 3951 The parallel matrix is partitioned such that the first m0 rows belong to 3952 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3953 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3954 3955 The DIAGONAL portion of the local submatrix of a processor can be defined 3956 as the submatrix which is obtained by extraction the part corresponding to 3957 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3958 first row that belongs to the processor, r2 is the last row belonging to 3959 the this processor, and c1-c2 is range of indices of the local part of a 3960 vector suitable for applying the matrix to. This is an mxn matrix. In the 3961 common case of a square matrix, the row and column ranges are the same and 3962 the DIAGONAL part is also square. The remaining portion of the local 3963 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3964 3965 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3966 3967 You can call MatGetInfo() to get information on how effective the preallocation was; 3968 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3969 You can also run with the option -info and look for messages with the string 3970 malloc in them to see if additional memory allocation was needed. 3971 3972 Example usage: 3973 3974 Consider the following 8x8 matrix with 34 non-zero values, that is 3975 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3976 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3977 as follows: 3978 3979 .vb 3980 1 2 0 | 0 3 0 | 0 4 3981 Proc0 0 5 6 | 7 0 0 | 8 0 3982 9 0 10 | 11 0 0 | 12 0 3983 ------------------------------------- 3984 13 0 14 | 15 16 17 | 0 0 3985 Proc1 0 18 0 | 19 20 21 | 0 0 3986 0 0 0 | 22 23 0 | 24 0 3987 ------------------------------------- 3988 Proc2 25 26 27 | 0 0 28 | 29 0 3989 30 0 0 | 31 32 33 | 0 34 3990 .ve 3991 3992 This can be represented as a collection of submatrices as: 3993 3994 .vb 3995 A B C 3996 D E F 3997 G H I 3998 .ve 3999 4000 Where the submatrices A,B,C are owned by proc0, D,E,F are 4001 owned by proc1, G,H,I are owned by proc2. 4002 4003 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4004 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4005 The 'M','N' parameters are 8,8, and have the same values on all procs. 4006 4007 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4008 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4009 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4010 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4011 part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ 4012 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4013 4014 When d_nz, o_nz parameters are specified, d_nz storage elements are 4015 allocated for every row of the local diagonal submatrix, and o_nz 4016 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4017 One way to choose d_nz and o_nz is to use the max nonzerors per local 4018 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4019 In this case, the values of d_nz,o_nz are: 4020 .vb 4021 proc0 : dnz = 2, o_nz = 2 4022 proc1 : dnz = 3, o_nz = 2 4023 proc2 : dnz = 1, o_nz = 4 4024 .ve 4025 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4026 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4027 for proc3. i.e we are using 12+15+10=37 storage locations to store 4028 34 values. 4029 4030 When d_nnz, o_nnz parameters are specified, the storage is specified 4031 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4032 In the above case the values for d_nnz,o_nnz are: 4033 .vb 4034 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4035 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4036 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4037 .ve 4038 Here the space allocated is sum of all the above values i.e 34, and 4039 hence pre-allocation is perfect. 4040 4041 Level: intermediate 4042 4043 .seealso: `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4044 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4045 @*/ 4046 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) { 4047 PetscFunctionBegin; 4048 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4049 PetscValidType(B, 1); 4050 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4051 PetscFunctionReturn(0); 4052 } 4053 4054 /*@ 4055 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4056 CSR format for the local rows. 4057 4058 Collective 4059 4060 Input Parameters: 4061 + comm - MPI communicator 4062 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4063 . n - This value should be the same as the local size used in creating the 4064 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4065 calculated if N is given) For square matrices n is almost always m. 4066 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4067 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4068 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4069 . j - column indices 4070 - a - optional matrix values 4071 4072 Output Parameter: 4073 . mat - the matrix 4074 4075 Level: intermediate 4076 4077 Notes: 4078 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4079 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4080 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4081 4082 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4083 4084 The format which is used for the sparse matrix input, is equivalent to a 4085 row-major ordering.. i.e for the following matrix, the input data expected is 4086 as shown 4087 4088 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4089 4090 $ 1 0 0 4091 $ 2 0 3 P0 4092 $ ------- 4093 $ 4 5 6 P1 4094 $ 4095 $ Process0 [P0]: rows_owned=[0,1] 4096 $ i = {0,1,3} [size = nrow+1 = 2+1] 4097 $ j = {0,0,2} [size = 3] 4098 $ v = {1,2,3} [size = 3] 4099 $ 4100 $ Process1 [P1]: rows_owned=[2] 4101 $ i = {0,3} [size = nrow+1 = 1+1] 4102 $ j = {0,1,2} [size = 3] 4103 $ v = {4,5,6} [size = 3] 4104 4105 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4106 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4107 @*/ 4108 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) { 4109 PetscFunctionBegin; 4110 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4111 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4112 PetscCall(MatCreate(comm, mat)); 4113 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4114 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4115 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4116 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4117 PetscFunctionReturn(0); 4118 } 4119 4120 /*@ 4121 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4122 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()` 4123 4124 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4125 4126 Collective 4127 4128 Input Parameters: 4129 + mat - the matrix 4130 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4131 . n - This value should be the same as the local size used in creating the 4132 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4133 calculated if N is given) For square matrices n is almost always m. 4134 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4135 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4136 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4137 . J - column indices 4138 - v - matrix values 4139 4140 Level: intermediate 4141 4142 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4143 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4144 @*/ 4145 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) { 4146 PetscInt nnz, i; 4147 PetscBool nooffprocentries; 4148 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4149 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4150 PetscScalar *ad, *ao; 4151 PetscInt ldi, Iii, md; 4152 const PetscInt *Adi = Ad->i; 4153 PetscInt *ld = Aij->ld; 4154 4155 PetscFunctionBegin; 4156 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4157 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4158 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4159 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4160 4161 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4162 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4163 4164 for (i = 0; i < m; i++) { 4165 nnz = Ii[i + 1] - Ii[i]; 4166 Iii = Ii[i]; 4167 ldi = ld[i]; 4168 md = Adi[i + 1] - Adi[i]; 4169 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4170 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4171 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4172 ad += md; 4173 ao += nnz - md; 4174 } 4175 nooffprocentries = mat->nooffprocentries; 4176 mat->nooffprocentries = PETSC_TRUE; 4177 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4178 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4179 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4180 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4181 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4182 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4183 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4184 mat->nooffprocentries = nooffprocentries; 4185 PetscFunctionReturn(0); 4186 } 4187 4188 /*@ 4189 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4190 4191 Collective 4192 4193 Input Parameters: 4194 + mat - the matrix 4195 - v - matrix values, stored by row 4196 4197 Level: intermediate 4198 4199 Note: 4200 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4201 4202 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4203 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4204 @*/ 4205 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) { 4206 PetscInt nnz, i, m; 4207 PetscBool nooffprocentries; 4208 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4209 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4210 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4211 PetscScalar *ad, *ao; 4212 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4213 PetscInt ldi, Iii, md; 4214 PetscInt *ld = Aij->ld; 4215 4216 PetscFunctionBegin; 4217 m = mat->rmap->n; 4218 4219 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4220 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4221 Iii = 0; 4222 for (i = 0; i < m; i++) { 4223 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4224 ldi = ld[i]; 4225 md = Adi[i + 1] - Adi[i]; 4226 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4227 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4228 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4229 ad += md; 4230 ao += nnz - md; 4231 Iii += nnz; 4232 } 4233 nooffprocentries = mat->nooffprocentries; 4234 mat->nooffprocentries = PETSC_TRUE; 4235 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4236 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4237 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4238 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4239 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4240 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4241 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4242 mat->nooffprocentries = nooffprocentries; 4243 PetscFunctionReturn(0); 4244 } 4245 4246 /*@C 4247 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4248 (the default parallel PETSc format). For good matrix assembly performance 4249 the user should preallocate the matrix storage by setting the parameters 4250 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4251 performance can be increased by more than a factor of 50. 4252 4253 Collective 4254 4255 Input Parameters: 4256 + comm - MPI communicator 4257 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4258 This value should be the same as the local size used in creating the 4259 y vector for the matrix-vector product y = Ax. 4260 . n - This value should be the same as the local size used in creating the 4261 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4262 calculated if N is given) For square matrices n is almost always m. 4263 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4264 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4265 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4266 (same value is used for all local rows) 4267 . d_nnz - array containing the number of nonzeros in the various rows of the 4268 DIAGONAL portion of the local submatrix (possibly different for each row) 4269 or NULL, if d_nz is used to specify the nonzero structure. 4270 The size of this array is equal to the number of local rows, i.e 'm'. 4271 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4272 submatrix (same value is used for all local rows). 4273 - o_nnz - array containing the number of nonzeros in the various rows of the 4274 OFF-DIAGONAL portion of the local submatrix (possibly different for 4275 each row) or NULL, if o_nz is used to specify the nonzero 4276 structure. The size of this array is equal to the number 4277 of local rows, i.e 'm'. 4278 4279 Output Parameter: 4280 . A - the matrix 4281 4282 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4283 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4284 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4285 4286 Notes: 4287 If the *_nnz parameter is given then the *_nz parameter is ignored 4288 4289 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4290 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4291 storage requirements for this matrix. 4292 4293 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4294 processor than it must be used on all processors that share the object for 4295 that argument. 4296 4297 The user MUST specify either the local or global matrix dimensions 4298 (possibly both). 4299 4300 The parallel matrix is partitioned across processors such that the 4301 first m0 rows belong to process 0, the next m1 rows belong to 4302 process 1, the next m2 rows belong to process 2 etc.. where 4303 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4304 values corresponding to [m x N] submatrix. 4305 4306 The columns are logically partitioned with the n0 columns belonging 4307 to 0th partition, the next n1 columns belonging to the next 4308 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4309 4310 The DIAGONAL portion of the local submatrix on any given processor 4311 is the submatrix corresponding to the rows and columns m,n 4312 corresponding to the given processor. i.e diagonal matrix on 4313 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4314 etc. The remaining portion of the local submatrix [m x (N-n)] 4315 constitute the OFF-DIAGONAL portion. The example below better 4316 illustrates this concept. 4317 4318 For a square global matrix we define each processor's diagonal portion 4319 to be its local rows and the corresponding columns (a square submatrix); 4320 each processor's off-diagonal portion encompasses the remainder of the 4321 local matrix (a rectangular submatrix). 4322 4323 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4324 4325 When calling this routine with a single process communicator, a matrix of 4326 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4327 type of communicator, use the construction mechanism 4328 .vb 4329 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4330 .ve 4331 4332 $ MatCreate(...,&A); 4333 $ MatSetType(A,MATMPIAIJ); 4334 $ MatSetSizes(A, m,n,M,N); 4335 $ MatMPIAIJSetPreallocation(A,...); 4336 4337 By default, this format uses inodes (identical nodes) when possible. 4338 We search for consecutive rows with the same nonzero structure, thereby 4339 reusing matrix information to achieve increased efficiency. 4340 4341 Options Database Keys: 4342 + -mat_no_inode - Do not use inodes 4343 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4344 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4345 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4346 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4347 4348 Example usage: 4349 4350 Consider the following 8x8 matrix with 34 non-zero values, that is 4351 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4352 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4353 as follows 4354 4355 .vb 4356 1 2 0 | 0 3 0 | 0 4 4357 Proc0 0 5 6 | 7 0 0 | 8 0 4358 9 0 10 | 11 0 0 | 12 0 4359 ------------------------------------- 4360 13 0 14 | 15 16 17 | 0 0 4361 Proc1 0 18 0 | 19 20 21 | 0 0 4362 0 0 0 | 22 23 0 | 24 0 4363 ------------------------------------- 4364 Proc2 25 26 27 | 0 0 28 | 29 0 4365 30 0 0 | 31 32 33 | 0 34 4366 .ve 4367 4368 This can be represented as a collection of submatrices as 4369 4370 .vb 4371 A B C 4372 D E F 4373 G H I 4374 .ve 4375 4376 Where the submatrices A,B,C are owned by proc0, D,E,F are 4377 owned by proc1, G,H,I are owned by proc2. 4378 4379 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4380 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4381 The 'M','N' parameters are 8,8, and have the same values on all procs. 4382 4383 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4384 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4385 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4386 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4387 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4388 matrix, ans [DF] as another SeqAIJ matrix. 4389 4390 When d_nz, o_nz parameters are specified, d_nz storage elements are 4391 allocated for every row of the local diagonal submatrix, and o_nz 4392 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4393 One way to choose d_nz and o_nz is to use the max nonzerors per local 4394 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4395 In this case, the values of d_nz,o_nz are 4396 .vb 4397 proc0 : dnz = 2, o_nz = 2 4398 proc1 : dnz = 3, o_nz = 2 4399 proc2 : dnz = 1, o_nz = 4 4400 .ve 4401 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4402 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4403 for proc3. i.e we are using 12+15+10=37 storage locations to store 4404 34 values. 4405 4406 When d_nnz, o_nnz parameters are specified, the storage is specified 4407 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4408 In the above case the values for d_nnz,o_nnz are 4409 .vb 4410 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4411 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4412 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4413 .ve 4414 Here the space allocated is sum of all the above values i.e 34, and 4415 hence pre-allocation is perfect. 4416 4417 Level: intermediate 4418 4419 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4420 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4421 @*/ 4422 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) { 4423 PetscMPIInt size; 4424 4425 PetscFunctionBegin; 4426 PetscCall(MatCreate(comm, A)); 4427 PetscCall(MatSetSizes(*A, m, n, M, N)); 4428 PetscCallMPI(MPI_Comm_size(comm, &size)); 4429 if (size > 1) { 4430 PetscCall(MatSetType(*A, MATMPIAIJ)); 4431 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4432 } else { 4433 PetscCall(MatSetType(*A, MATSEQAIJ)); 4434 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4435 } 4436 PetscFunctionReturn(0); 4437 } 4438 4439 /*@C 4440 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4441 4442 Not collective 4443 4444 Input Parameter: 4445 . A - The `MATMPIAIJ` matrix 4446 4447 Output Parameters: 4448 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4449 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4450 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4451 4452 Note: 4453 The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4454 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4455 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4456 local column numbers to global column numbers in the original matrix. 4457 4458 Level: intermediate 4459 4460 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4461 @*/ 4462 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) { 4463 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4464 PetscBool flg; 4465 4466 PetscFunctionBegin; 4467 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4468 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4469 if (Ad) *Ad = a->A; 4470 if (Ao) *Ao = a->B; 4471 if (colmap) *colmap = a->garray; 4472 PetscFunctionReturn(0); 4473 } 4474 4475 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 4476 PetscInt m, N, i, rstart, nnz, Ii; 4477 PetscInt *indx; 4478 PetscScalar *values; 4479 MatType rootType; 4480 4481 PetscFunctionBegin; 4482 PetscCall(MatGetSize(inmat, &m, &N)); 4483 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4484 PetscInt *dnz, *onz, sum, bs, cbs; 4485 4486 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4487 /* Check sum(n) = N */ 4488 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4489 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4490 4491 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4492 rstart -= m; 4493 4494 MatPreallocateBegin(comm, m, n, dnz, onz); 4495 for (i = 0; i < m; i++) { 4496 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4497 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4498 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4499 } 4500 4501 PetscCall(MatCreate(comm, outmat)); 4502 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4503 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4504 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4505 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4506 PetscCall(MatSetType(*outmat, rootType)); 4507 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4508 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4509 MatPreallocateEnd(dnz, onz); 4510 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4511 } 4512 4513 /* numeric phase */ 4514 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4515 for (i = 0; i < m; i++) { 4516 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4517 Ii = i + rstart; 4518 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4519 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4520 } 4521 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4522 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4523 PetscFunctionReturn(0); 4524 } 4525 4526 PetscErrorCode MatFileSplit(Mat A, char *outfile) { 4527 PetscMPIInt rank; 4528 PetscInt m, N, i, rstart, nnz; 4529 size_t len; 4530 const PetscInt *indx; 4531 PetscViewer out; 4532 char *name; 4533 Mat B; 4534 const PetscScalar *values; 4535 4536 PetscFunctionBegin; 4537 PetscCall(MatGetLocalSize(A, &m, NULL)); 4538 PetscCall(MatGetSize(A, NULL, &N)); 4539 /* Should this be the type of the diagonal block of A? */ 4540 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4541 PetscCall(MatSetSizes(B, m, N, m, N)); 4542 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4543 PetscCall(MatSetType(B, MATSEQAIJ)); 4544 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4545 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4546 for (i = 0; i < m; i++) { 4547 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4548 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4549 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4550 } 4551 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4552 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4553 4554 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4555 PetscCall(PetscStrlen(outfile, &len)); 4556 PetscCall(PetscMalloc1(len + 6, &name)); 4557 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4558 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4559 PetscCall(PetscFree(name)); 4560 PetscCall(MatView(B, out)); 4561 PetscCall(PetscViewerDestroy(&out)); 4562 PetscCall(MatDestroy(&B)); 4563 PetscFunctionReturn(0); 4564 } 4565 4566 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) { 4567 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4568 4569 PetscFunctionBegin; 4570 if (!merge) PetscFunctionReturn(0); 4571 PetscCall(PetscFree(merge->id_r)); 4572 PetscCall(PetscFree(merge->len_s)); 4573 PetscCall(PetscFree(merge->len_r)); 4574 PetscCall(PetscFree(merge->bi)); 4575 PetscCall(PetscFree(merge->bj)); 4576 PetscCall(PetscFree(merge->buf_ri[0])); 4577 PetscCall(PetscFree(merge->buf_ri)); 4578 PetscCall(PetscFree(merge->buf_rj[0])); 4579 PetscCall(PetscFree(merge->buf_rj)); 4580 PetscCall(PetscFree(merge->coi)); 4581 PetscCall(PetscFree(merge->coj)); 4582 PetscCall(PetscFree(merge->owners_co)); 4583 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4584 PetscCall(PetscFree(merge)); 4585 PetscFunctionReturn(0); 4586 } 4587 4588 #include <../src/mat/utils/freespace.h> 4589 #include <petscbt.h> 4590 4591 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) { 4592 MPI_Comm comm; 4593 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4594 PetscMPIInt size, rank, taga, *len_s; 4595 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4596 PetscInt proc, m; 4597 PetscInt **buf_ri, **buf_rj; 4598 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4599 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4600 MPI_Request *s_waits, *r_waits; 4601 MPI_Status *status; 4602 const MatScalar *aa, *a_a; 4603 MatScalar **abuf_r, *ba_i; 4604 Mat_Merge_SeqsToMPI *merge; 4605 PetscContainer container; 4606 4607 PetscFunctionBegin; 4608 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4609 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4610 4611 PetscCallMPI(MPI_Comm_size(comm, &size)); 4612 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4613 4614 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4615 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4616 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4617 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4618 aa = a_a; 4619 4620 bi = merge->bi; 4621 bj = merge->bj; 4622 buf_ri = merge->buf_ri; 4623 buf_rj = merge->buf_rj; 4624 4625 PetscCall(PetscMalloc1(size, &status)); 4626 owners = merge->rowmap->range; 4627 len_s = merge->len_s; 4628 4629 /* send and recv matrix values */ 4630 /*-----------------------------*/ 4631 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4632 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4633 4634 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4635 for (proc = 0, k = 0; proc < size; proc++) { 4636 if (!len_s[proc]) continue; 4637 i = owners[proc]; 4638 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4639 k++; 4640 } 4641 4642 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4643 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4644 PetscCall(PetscFree(status)); 4645 4646 PetscCall(PetscFree(s_waits)); 4647 PetscCall(PetscFree(r_waits)); 4648 4649 /* insert mat values of mpimat */ 4650 /*----------------------------*/ 4651 PetscCall(PetscMalloc1(N, &ba_i)); 4652 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4653 4654 for (k = 0; k < merge->nrecv; k++) { 4655 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4656 nrows = *(buf_ri_k[k]); 4657 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4658 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4659 } 4660 4661 /* set values of ba */ 4662 m = merge->rowmap->n; 4663 for (i = 0; i < m; i++) { 4664 arow = owners[rank] + i; 4665 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4666 bnzi = bi[i + 1] - bi[i]; 4667 PetscCall(PetscArrayzero(ba_i, bnzi)); 4668 4669 /* add local non-zero vals of this proc's seqmat into ba */ 4670 anzi = ai[arow + 1] - ai[arow]; 4671 aj = a->j + ai[arow]; 4672 aa = a_a + ai[arow]; 4673 nextaj = 0; 4674 for (j = 0; nextaj < anzi; j++) { 4675 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4676 ba_i[j] += aa[nextaj++]; 4677 } 4678 } 4679 4680 /* add received vals into ba */ 4681 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4682 /* i-th row */ 4683 if (i == *nextrow[k]) { 4684 anzi = *(nextai[k] + 1) - *nextai[k]; 4685 aj = buf_rj[k] + *(nextai[k]); 4686 aa = abuf_r[k] + *(nextai[k]); 4687 nextaj = 0; 4688 for (j = 0; nextaj < anzi; j++) { 4689 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4690 ba_i[j] += aa[nextaj++]; 4691 } 4692 } 4693 nextrow[k]++; 4694 nextai[k]++; 4695 } 4696 } 4697 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4698 } 4699 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4700 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4701 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4702 4703 PetscCall(PetscFree(abuf_r[0])); 4704 PetscCall(PetscFree(abuf_r)); 4705 PetscCall(PetscFree(ba_i)); 4706 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4707 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4708 PetscFunctionReturn(0); 4709 } 4710 4711 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) { 4712 Mat B_mpi; 4713 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4714 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4715 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4716 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4717 PetscInt len, proc, *dnz, *onz, bs, cbs; 4718 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4719 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4720 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4721 MPI_Status *status; 4722 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4723 PetscBT lnkbt; 4724 Mat_Merge_SeqsToMPI *merge; 4725 PetscContainer container; 4726 4727 PetscFunctionBegin; 4728 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4729 4730 /* make sure it is a PETSc comm */ 4731 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4732 PetscCallMPI(MPI_Comm_size(comm, &size)); 4733 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4734 4735 PetscCall(PetscNew(&merge)); 4736 PetscCall(PetscMalloc1(size, &status)); 4737 4738 /* determine row ownership */ 4739 /*---------------------------------------------------------*/ 4740 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4741 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4742 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4743 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4744 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4745 PetscCall(PetscMalloc1(size, &len_si)); 4746 PetscCall(PetscMalloc1(size, &merge->len_s)); 4747 4748 m = merge->rowmap->n; 4749 owners = merge->rowmap->range; 4750 4751 /* determine the number of messages to send, their lengths */ 4752 /*---------------------------------------------------------*/ 4753 len_s = merge->len_s; 4754 4755 len = 0; /* length of buf_si[] */ 4756 merge->nsend = 0; 4757 for (proc = 0; proc < size; proc++) { 4758 len_si[proc] = 0; 4759 if (proc == rank) { 4760 len_s[proc] = 0; 4761 } else { 4762 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4763 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4764 } 4765 if (len_s[proc]) { 4766 merge->nsend++; 4767 nrows = 0; 4768 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4769 if (ai[i + 1] > ai[i]) nrows++; 4770 } 4771 len_si[proc] = 2 * (nrows + 1); 4772 len += len_si[proc]; 4773 } 4774 } 4775 4776 /* determine the number and length of messages to receive for ij-structure */ 4777 /*-------------------------------------------------------------------------*/ 4778 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4779 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4780 4781 /* post the Irecv of j-structure */ 4782 /*-------------------------------*/ 4783 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4784 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4785 4786 /* post the Isend of j-structure */ 4787 /*--------------------------------*/ 4788 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4789 4790 for (proc = 0, k = 0; proc < size; proc++) { 4791 if (!len_s[proc]) continue; 4792 i = owners[proc]; 4793 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4794 k++; 4795 } 4796 4797 /* receives and sends of j-structure are complete */ 4798 /*------------------------------------------------*/ 4799 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4800 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4801 4802 /* send and recv i-structure */ 4803 /*---------------------------*/ 4804 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4805 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4806 4807 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4808 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4809 for (proc = 0, k = 0; proc < size; proc++) { 4810 if (!len_s[proc]) continue; 4811 /* form outgoing message for i-structure: 4812 buf_si[0]: nrows to be sent 4813 [1:nrows]: row index (global) 4814 [nrows+1:2*nrows+1]: i-structure index 4815 */ 4816 /*-------------------------------------------*/ 4817 nrows = len_si[proc] / 2 - 1; 4818 buf_si_i = buf_si + nrows + 1; 4819 buf_si[0] = nrows; 4820 buf_si_i[0] = 0; 4821 nrows = 0; 4822 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4823 anzi = ai[i + 1] - ai[i]; 4824 if (anzi) { 4825 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4826 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4827 nrows++; 4828 } 4829 } 4830 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4831 k++; 4832 buf_si += len_si[proc]; 4833 } 4834 4835 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4836 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4837 4838 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4839 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4840 4841 PetscCall(PetscFree(len_si)); 4842 PetscCall(PetscFree(len_ri)); 4843 PetscCall(PetscFree(rj_waits)); 4844 PetscCall(PetscFree2(si_waits, sj_waits)); 4845 PetscCall(PetscFree(ri_waits)); 4846 PetscCall(PetscFree(buf_s)); 4847 PetscCall(PetscFree(status)); 4848 4849 /* compute a local seq matrix in each processor */ 4850 /*----------------------------------------------*/ 4851 /* allocate bi array and free space for accumulating nonzero column info */ 4852 PetscCall(PetscMalloc1(m + 1, &bi)); 4853 bi[0] = 0; 4854 4855 /* create and initialize a linked list */ 4856 nlnk = N + 1; 4857 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4858 4859 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4860 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4861 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4862 4863 current_space = free_space; 4864 4865 /* determine symbolic info for each local row */ 4866 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4867 4868 for (k = 0; k < merge->nrecv; k++) { 4869 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4870 nrows = *buf_ri_k[k]; 4871 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4872 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4873 } 4874 4875 MatPreallocateBegin(comm, m, n, dnz, onz); 4876 len = 0; 4877 for (i = 0; i < m; i++) { 4878 bnzi = 0; 4879 /* add local non-zero cols of this proc's seqmat into lnk */ 4880 arow = owners[rank] + i; 4881 anzi = ai[arow + 1] - ai[arow]; 4882 aj = a->j + ai[arow]; 4883 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4884 bnzi += nlnk; 4885 /* add received col data into lnk */ 4886 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4887 if (i == *nextrow[k]) { /* i-th row */ 4888 anzi = *(nextai[k] + 1) - *nextai[k]; 4889 aj = buf_rj[k] + *nextai[k]; 4890 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4891 bnzi += nlnk; 4892 nextrow[k]++; 4893 nextai[k]++; 4894 } 4895 } 4896 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4897 4898 /* if free space is not available, make more free space */ 4899 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4900 /* copy data into free space, then initialize lnk */ 4901 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4902 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4903 4904 current_space->array += bnzi; 4905 current_space->local_used += bnzi; 4906 current_space->local_remaining -= bnzi; 4907 4908 bi[i + 1] = bi[i] + bnzi; 4909 } 4910 4911 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4912 4913 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 4914 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4915 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4916 4917 /* create symbolic parallel matrix B_mpi */ 4918 /*---------------------------------------*/ 4919 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4920 PetscCall(MatCreate(comm, &B_mpi)); 4921 if (n == PETSC_DECIDE) { 4922 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4923 } else { 4924 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4925 } 4926 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 4927 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 4928 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 4929 MatPreallocateEnd(dnz, onz); 4930 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 4931 4932 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4933 B_mpi->assembled = PETSC_FALSE; 4934 merge->bi = bi; 4935 merge->bj = bj; 4936 merge->buf_ri = buf_ri; 4937 merge->buf_rj = buf_rj; 4938 merge->coi = NULL; 4939 merge->coj = NULL; 4940 merge->owners_co = NULL; 4941 4942 PetscCall(PetscCommDestroy(&comm)); 4943 4944 /* attach the supporting struct to B_mpi for reuse */ 4945 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 4946 PetscCall(PetscContainerSetPointer(container, merge)); 4947 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 4948 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 4949 PetscCall(PetscContainerDestroy(&container)); 4950 *mpimat = B_mpi; 4951 4952 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4953 PetscFunctionReturn(0); 4954 } 4955 4956 /*@C 4957 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 4958 matrices from each processor 4959 4960 Collective 4961 4962 Input Parameters: 4963 + comm - the communicators the parallel matrix will live on 4964 . seqmat - the input sequential matrices 4965 . m - number of local rows (or `PETSC_DECIDE`) 4966 . n - number of local columns (or `PETSC_DECIDE`) 4967 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 4968 4969 Output Parameter: 4970 . mpimat - the parallel matrix generated 4971 4972 Level: advanced 4973 4974 Note: 4975 The dimensions of the sequential matrix in each processor MUST be the same. 4976 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4977 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 4978 @*/ 4979 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) { 4980 PetscMPIInt size; 4981 4982 PetscFunctionBegin; 4983 PetscCallMPI(MPI_Comm_size(comm, &size)); 4984 if (size == 1) { 4985 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 4986 if (scall == MAT_INITIAL_MATRIX) { 4987 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 4988 } else { 4989 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 4990 } 4991 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 4992 PetscFunctionReturn(0); 4993 } 4994 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 4995 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 4996 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 4997 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 4998 PetscFunctionReturn(0); 4999 } 5000 5001 /*@ 5002 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5003 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5004 with `MatGetSize()` 5005 5006 Not Collective 5007 5008 Input Parameters: 5009 + A - the matrix 5010 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5011 5012 Output Parameter: 5013 . A_loc - the local sequential matrix generated 5014 5015 Level: developer 5016 5017 Notes: 5018 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5019 5020 Destroy the matrix with `MatDestroy()` 5021 5022 .seealso: `MatMPIAIJGetLocalMat()` 5023 @*/ 5024 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) { 5025 PetscBool mpi; 5026 5027 PetscFunctionBegin; 5028 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5029 if (mpi) { 5030 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5031 } else { 5032 *A_loc = A; 5033 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5034 } 5035 PetscFunctionReturn(0); 5036 } 5037 5038 /*@ 5039 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5040 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5041 with `MatGetSize()` 5042 5043 Not Collective 5044 5045 Input Parameters: 5046 + A - the matrix 5047 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5048 5049 Output Parameter: 5050 . A_loc - the local sequential matrix generated 5051 5052 Level: developer 5053 5054 Notes: 5055 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5056 5057 When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A. 5058 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called. 5059 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5060 modify the values of the returned A_loc. 5061 5062 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5063 @*/ 5064 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) { 5065 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5066 Mat_SeqAIJ *mat, *a, *b; 5067 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5068 const PetscScalar *aa, *ba, *aav, *bav; 5069 PetscScalar *ca, *cam; 5070 PetscMPIInt size; 5071 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5072 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5073 PetscBool match; 5074 5075 PetscFunctionBegin; 5076 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5077 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5078 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5079 if (size == 1) { 5080 if (scall == MAT_INITIAL_MATRIX) { 5081 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5082 *A_loc = mpimat->A; 5083 } else if (scall == MAT_REUSE_MATRIX) { 5084 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5085 } 5086 PetscFunctionReturn(0); 5087 } 5088 5089 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5090 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5091 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5092 ai = a->i; 5093 aj = a->j; 5094 bi = b->i; 5095 bj = b->j; 5096 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5097 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5098 aa = aav; 5099 ba = bav; 5100 if (scall == MAT_INITIAL_MATRIX) { 5101 PetscCall(PetscMalloc1(1 + am, &ci)); 5102 ci[0] = 0; 5103 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5104 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5105 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5106 k = 0; 5107 for (i = 0; i < am; i++) { 5108 ncols_o = bi[i + 1] - bi[i]; 5109 ncols_d = ai[i + 1] - ai[i]; 5110 /* off-diagonal portion of A */ 5111 for (jo = 0; jo < ncols_o; jo++) { 5112 col = cmap[*bj]; 5113 if (col >= cstart) break; 5114 cj[k] = col; 5115 bj++; 5116 ca[k++] = *ba++; 5117 } 5118 /* diagonal portion of A */ 5119 for (j = 0; j < ncols_d; j++) { 5120 cj[k] = cstart + *aj++; 5121 ca[k++] = *aa++; 5122 } 5123 /* off-diagonal portion of A */ 5124 for (j = jo; j < ncols_o; j++) { 5125 cj[k] = cmap[*bj++]; 5126 ca[k++] = *ba++; 5127 } 5128 } 5129 /* put together the new matrix */ 5130 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5131 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5132 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5133 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5134 mat->free_a = PETSC_TRUE; 5135 mat->free_ij = PETSC_TRUE; 5136 mat->nonew = 0; 5137 } else if (scall == MAT_REUSE_MATRIX) { 5138 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5139 ci = mat->i; 5140 cj = mat->j; 5141 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5142 for (i = 0; i < am; i++) { 5143 /* off-diagonal portion of A */ 5144 ncols_o = bi[i + 1] - bi[i]; 5145 for (jo = 0; jo < ncols_o; jo++) { 5146 col = cmap[*bj]; 5147 if (col >= cstart) break; 5148 *cam++ = *ba++; 5149 bj++; 5150 } 5151 /* diagonal portion of A */ 5152 ncols_d = ai[i + 1] - ai[i]; 5153 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5154 /* off-diagonal portion of A */ 5155 for (j = jo; j < ncols_o; j++) { 5156 *cam++ = *ba++; 5157 bj++; 5158 } 5159 } 5160 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5161 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5162 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5163 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5164 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5165 PetscFunctionReturn(0); 5166 } 5167 5168 /*@ 5169 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5170 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5171 5172 Not Collective 5173 5174 Input Parameters: 5175 + A - the matrix 5176 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5177 5178 Output Parameters: 5179 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5180 - A_loc - the local sequential matrix generated 5181 5182 Level: developer 5183 5184 Note: 5185 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering) 5186 5187 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5188 @*/ 5189 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) { 5190 Mat Ao, Ad; 5191 const PetscInt *cmap; 5192 PetscMPIInt size; 5193 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5194 5195 PetscFunctionBegin; 5196 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5197 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5198 if (size == 1) { 5199 if (scall == MAT_INITIAL_MATRIX) { 5200 PetscCall(PetscObjectReference((PetscObject)Ad)); 5201 *A_loc = Ad; 5202 } else if (scall == MAT_REUSE_MATRIX) { 5203 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5204 } 5205 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5206 PetscFunctionReturn(0); 5207 } 5208 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5209 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5210 if (f) { 5211 PetscCall((*f)(A, scall, glob, A_loc)); 5212 } else { 5213 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5214 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5215 Mat_SeqAIJ *c; 5216 PetscInt *ai = a->i, *aj = a->j; 5217 PetscInt *bi = b->i, *bj = b->j; 5218 PetscInt *ci, *cj; 5219 const PetscScalar *aa, *ba; 5220 PetscScalar *ca; 5221 PetscInt i, j, am, dn, on; 5222 5223 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5224 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5225 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5226 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5227 if (scall == MAT_INITIAL_MATRIX) { 5228 PetscInt k; 5229 PetscCall(PetscMalloc1(1 + am, &ci)); 5230 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5231 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5232 ci[0] = 0; 5233 for (i = 0, k = 0; i < am; i++) { 5234 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5235 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5236 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5237 /* diagonal portion of A */ 5238 for (j = 0; j < ncols_d; j++, k++) { 5239 cj[k] = *aj++; 5240 ca[k] = *aa++; 5241 } 5242 /* off-diagonal portion of A */ 5243 for (j = 0; j < ncols_o; j++, k++) { 5244 cj[k] = dn + *bj++; 5245 ca[k] = *ba++; 5246 } 5247 } 5248 /* put together the new matrix */ 5249 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5250 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5251 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5252 c = (Mat_SeqAIJ *)(*A_loc)->data; 5253 c->free_a = PETSC_TRUE; 5254 c->free_ij = PETSC_TRUE; 5255 c->nonew = 0; 5256 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5257 } else if (scall == MAT_REUSE_MATRIX) { 5258 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5259 for (i = 0; i < am; i++) { 5260 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5261 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5262 /* diagonal portion of A */ 5263 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5264 /* off-diagonal portion of A */ 5265 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5266 } 5267 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5268 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5269 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5270 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5271 if (glob) { 5272 PetscInt cst, *gidx; 5273 5274 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5275 PetscCall(PetscMalloc1(dn + on, &gidx)); 5276 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5277 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5278 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5279 } 5280 } 5281 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5282 PetscFunctionReturn(0); 5283 } 5284 5285 /*@C 5286 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5287 5288 Not Collective 5289 5290 Input Parameters: 5291 + A - the matrix 5292 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5293 - row, col - index sets of rows and columns to extract (or NULL) 5294 5295 Output Parameter: 5296 . A_loc - the local sequential matrix generated 5297 5298 Level: developer 5299 5300 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5301 @*/ 5302 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) { 5303 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5304 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5305 IS isrowa, iscola; 5306 Mat *aloc; 5307 PetscBool match; 5308 5309 PetscFunctionBegin; 5310 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5311 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5312 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5313 if (!row) { 5314 start = A->rmap->rstart; 5315 end = A->rmap->rend; 5316 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5317 } else { 5318 isrowa = *row; 5319 } 5320 if (!col) { 5321 start = A->cmap->rstart; 5322 cmap = a->garray; 5323 nzA = a->A->cmap->n; 5324 nzB = a->B->cmap->n; 5325 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5326 ncols = 0; 5327 for (i = 0; i < nzB; i++) { 5328 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5329 else break; 5330 } 5331 imark = i; 5332 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5333 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5334 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5335 } else { 5336 iscola = *col; 5337 } 5338 if (scall != MAT_INITIAL_MATRIX) { 5339 PetscCall(PetscMalloc1(1, &aloc)); 5340 aloc[0] = *A_loc; 5341 } 5342 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5343 if (!col) { /* attach global id of condensed columns */ 5344 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5345 } 5346 *A_loc = aloc[0]; 5347 PetscCall(PetscFree(aloc)); 5348 if (!row) PetscCall(ISDestroy(&isrowa)); 5349 if (!col) PetscCall(ISDestroy(&iscola)); 5350 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5351 PetscFunctionReturn(0); 5352 } 5353 5354 /* 5355 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5356 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5357 * on a global size. 5358 * */ 5359 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) { 5360 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5361 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5362 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5363 PetscMPIInt owner; 5364 PetscSFNode *iremote, *oiremote; 5365 const PetscInt *lrowindices; 5366 PetscSF sf, osf; 5367 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5368 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5369 MPI_Comm comm; 5370 ISLocalToGlobalMapping mapping; 5371 const PetscScalar *pd_a, *po_a; 5372 5373 PetscFunctionBegin; 5374 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5375 /* plocalsize is the number of roots 5376 * nrows is the number of leaves 5377 * */ 5378 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5379 PetscCall(ISGetLocalSize(rows, &nrows)); 5380 PetscCall(PetscCalloc1(nrows, &iremote)); 5381 PetscCall(ISGetIndices(rows, &lrowindices)); 5382 for (i = 0; i < nrows; i++) { 5383 /* Find a remote index and an owner for a row 5384 * The row could be local or remote 5385 * */ 5386 owner = 0; 5387 lidx = 0; 5388 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5389 iremote[i].index = lidx; 5390 iremote[i].rank = owner; 5391 } 5392 /* Create SF to communicate how many nonzero columns for each row */ 5393 PetscCall(PetscSFCreate(comm, &sf)); 5394 /* SF will figure out the number of nonzero colunms for each row, and their 5395 * offsets 5396 * */ 5397 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5398 PetscCall(PetscSFSetFromOptions(sf)); 5399 PetscCall(PetscSFSetUp(sf)); 5400 5401 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5402 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5403 PetscCall(PetscCalloc1(nrows, &pnnz)); 5404 roffsets[0] = 0; 5405 roffsets[1] = 0; 5406 for (i = 0; i < plocalsize; i++) { 5407 /* diag */ 5408 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5409 /* off diag */ 5410 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5411 /* compute offsets so that we relative location for each row */ 5412 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5413 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5414 } 5415 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5416 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5417 /* 'r' means root, and 'l' means leaf */ 5418 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5419 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5420 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5421 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5422 PetscCall(PetscSFDestroy(&sf)); 5423 PetscCall(PetscFree(roffsets)); 5424 PetscCall(PetscFree(nrcols)); 5425 dntotalcols = 0; 5426 ontotalcols = 0; 5427 ncol = 0; 5428 for (i = 0; i < nrows; i++) { 5429 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5430 ncol = PetscMax(pnnz[i], ncol); 5431 /* diag */ 5432 dntotalcols += nlcols[i * 2 + 0]; 5433 /* off diag */ 5434 ontotalcols += nlcols[i * 2 + 1]; 5435 } 5436 /* We do not need to figure the right number of columns 5437 * since all the calculations will be done by going through the raw data 5438 * */ 5439 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5440 PetscCall(MatSetUp(*P_oth)); 5441 PetscCall(PetscFree(pnnz)); 5442 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5443 /* diag */ 5444 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5445 /* off diag */ 5446 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5447 /* diag */ 5448 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5449 /* off diag */ 5450 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5451 dntotalcols = 0; 5452 ontotalcols = 0; 5453 ntotalcols = 0; 5454 for (i = 0; i < nrows; i++) { 5455 owner = 0; 5456 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5457 /* Set iremote for diag matrix */ 5458 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5459 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5460 iremote[dntotalcols].rank = owner; 5461 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5462 ilocal[dntotalcols++] = ntotalcols++; 5463 } 5464 /* off diag */ 5465 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5466 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5467 oiremote[ontotalcols].rank = owner; 5468 oilocal[ontotalcols++] = ntotalcols++; 5469 } 5470 } 5471 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5472 PetscCall(PetscFree(loffsets)); 5473 PetscCall(PetscFree(nlcols)); 5474 PetscCall(PetscSFCreate(comm, &sf)); 5475 /* P serves as roots and P_oth is leaves 5476 * Diag matrix 5477 * */ 5478 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5479 PetscCall(PetscSFSetFromOptions(sf)); 5480 PetscCall(PetscSFSetUp(sf)); 5481 5482 PetscCall(PetscSFCreate(comm, &osf)); 5483 /* Off diag */ 5484 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5485 PetscCall(PetscSFSetFromOptions(osf)); 5486 PetscCall(PetscSFSetUp(osf)); 5487 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5488 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5489 /* We operate on the matrix internal data for saving memory */ 5490 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5491 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5492 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5493 /* Convert to global indices for diag matrix */ 5494 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5495 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5496 /* We want P_oth store global indices */ 5497 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5498 /* Use memory scalable approach */ 5499 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5500 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5501 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5502 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5503 /* Convert back to local indices */ 5504 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5505 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5506 nout = 0; 5507 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5508 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5509 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5510 /* Exchange values */ 5511 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5512 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5513 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5514 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5515 /* Stop PETSc from shrinking memory */ 5516 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5517 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5518 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5519 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5520 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5521 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5522 PetscCall(PetscSFDestroy(&sf)); 5523 PetscCall(PetscSFDestroy(&osf)); 5524 PetscFunctionReturn(0); 5525 } 5526 5527 /* 5528 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5529 * This supports MPIAIJ and MAIJ 5530 * */ 5531 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) { 5532 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5533 Mat_SeqAIJ *p_oth; 5534 IS rows, map; 5535 PetscHMapI hamp; 5536 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5537 MPI_Comm comm; 5538 PetscSF sf, osf; 5539 PetscBool has; 5540 5541 PetscFunctionBegin; 5542 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5543 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5544 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5545 * and then create a submatrix (that often is an overlapping matrix) 5546 * */ 5547 if (reuse == MAT_INITIAL_MATRIX) { 5548 /* Use a hash table to figure out unique keys */ 5549 PetscCall(PetscHMapICreate(&hamp)); 5550 PetscCall(PetscHMapIResize(hamp, a->B->cmap->n)); 5551 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5552 count = 0; 5553 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5554 for (i = 0; i < a->B->cmap->n; i++) { 5555 key = a->garray[i] / dof; 5556 PetscCall(PetscHMapIHas(hamp, key, &has)); 5557 if (!has) { 5558 mapping[i] = count; 5559 PetscCall(PetscHMapISet(hamp, key, count++)); 5560 } else { 5561 /* Current 'i' has the same value the previous step */ 5562 mapping[i] = count - 1; 5563 } 5564 } 5565 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5566 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5567 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ", htsize, count); 5568 PetscCall(PetscCalloc1(htsize, &rowindices)); 5569 off = 0; 5570 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5571 PetscCall(PetscHMapIDestroy(&hamp)); 5572 PetscCall(PetscSortInt(htsize, rowindices)); 5573 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5574 /* In case, the matrix was already created but users want to recreate the matrix */ 5575 PetscCall(MatDestroy(P_oth)); 5576 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5577 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5578 PetscCall(ISDestroy(&map)); 5579 PetscCall(ISDestroy(&rows)); 5580 } else if (reuse == MAT_REUSE_MATRIX) { 5581 /* If matrix was already created, we simply update values using SF objects 5582 * that as attached to the matrix ealier. 5583 */ 5584 const PetscScalar *pd_a, *po_a; 5585 5586 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5587 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5588 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5589 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5590 /* Update values in place */ 5591 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5592 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5593 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5594 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5595 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5596 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5597 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5598 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5599 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5600 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5601 PetscFunctionReturn(0); 5602 } 5603 5604 /*@C 5605 MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A 5606 5607 Collective on A 5608 5609 Input Parameters: 5610 + A - the first matrix in `MATMPIAIJ` format 5611 . B - the second matrix in `MATMPIAIJ` format 5612 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5613 5614 Output Parameters: 5615 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5616 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5617 - B_seq - the sequential matrix generated 5618 5619 Level: developer 5620 5621 @*/ 5622 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) { 5623 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5624 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5625 IS isrowb, iscolb; 5626 Mat *bseq = NULL; 5627 5628 PetscFunctionBegin; 5629 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5630 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5631 } 5632 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5633 5634 if (scall == MAT_INITIAL_MATRIX) { 5635 start = A->cmap->rstart; 5636 cmap = a->garray; 5637 nzA = a->A->cmap->n; 5638 nzB = a->B->cmap->n; 5639 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5640 ncols = 0; 5641 for (i = 0; i < nzB; i++) { /* row < local row index */ 5642 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5643 else break; 5644 } 5645 imark = i; 5646 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5647 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5648 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5649 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5650 } else { 5651 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5652 isrowb = *rowb; 5653 iscolb = *colb; 5654 PetscCall(PetscMalloc1(1, &bseq)); 5655 bseq[0] = *B_seq; 5656 } 5657 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5658 *B_seq = bseq[0]; 5659 PetscCall(PetscFree(bseq)); 5660 if (!rowb) { 5661 PetscCall(ISDestroy(&isrowb)); 5662 } else { 5663 *rowb = isrowb; 5664 } 5665 if (!colb) { 5666 PetscCall(ISDestroy(&iscolb)); 5667 } else { 5668 *colb = iscolb; 5669 } 5670 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5671 PetscFunctionReturn(0); 5672 } 5673 5674 /* 5675 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5676 of the OFF-DIAGONAL portion of local A 5677 5678 Collective on Mat 5679 5680 Input Parameters: 5681 + A,B - the matrices in mpiaij format 5682 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5683 5684 Output Parameter: 5685 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5686 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5687 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5688 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5689 5690 Developer Note: 5691 This directly accesses information inside the VecScatter associated with the matrix-vector product 5692 for this matrix. This is not desirable.. 5693 5694 Level: developer 5695 5696 */ 5697 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) { 5698 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5699 Mat_SeqAIJ *b_oth; 5700 VecScatter ctx; 5701 MPI_Comm comm; 5702 const PetscMPIInt *rprocs, *sprocs; 5703 const PetscInt *srow, *rstarts, *sstarts; 5704 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5705 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5706 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5707 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5708 PetscMPIInt size, tag, rank, nreqs; 5709 5710 PetscFunctionBegin; 5711 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5712 PetscCallMPI(MPI_Comm_size(comm, &size)); 5713 5714 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5715 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5716 } 5717 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5718 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5719 5720 if (size == 1) { 5721 startsj_s = NULL; 5722 bufa_ptr = NULL; 5723 *B_oth = NULL; 5724 PetscFunctionReturn(0); 5725 } 5726 5727 ctx = a->Mvctx; 5728 tag = ((PetscObject)ctx)->tag; 5729 5730 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5731 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5732 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5733 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5734 PetscCall(PetscMalloc1(nreqs, &reqs)); 5735 rwaits = reqs; 5736 swaits = reqs + nrecvs; 5737 5738 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5739 if (scall == MAT_INITIAL_MATRIX) { 5740 /* i-array */ 5741 /*---------*/ 5742 /* post receives */ 5743 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5744 for (i = 0; i < nrecvs; i++) { 5745 rowlen = rvalues + rstarts[i] * rbs; 5746 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5747 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5748 } 5749 5750 /* pack the outgoing message */ 5751 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5752 5753 sstartsj[0] = 0; 5754 rstartsj[0] = 0; 5755 len = 0; /* total length of j or a array to be sent */ 5756 if (nsends) { 5757 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5758 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5759 } 5760 for (i = 0; i < nsends; i++) { 5761 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5762 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5763 for (j = 0; j < nrows; j++) { 5764 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5765 for (l = 0; l < sbs; l++) { 5766 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5767 5768 rowlen[j * sbs + l] = ncols; 5769 5770 len += ncols; 5771 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5772 } 5773 k++; 5774 } 5775 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5776 5777 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5778 } 5779 /* recvs and sends of i-array are completed */ 5780 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5781 PetscCall(PetscFree(svalues)); 5782 5783 /* allocate buffers for sending j and a arrays */ 5784 PetscCall(PetscMalloc1(len + 1, &bufj)); 5785 PetscCall(PetscMalloc1(len + 1, &bufa)); 5786 5787 /* create i-array of B_oth */ 5788 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5789 5790 b_othi[0] = 0; 5791 len = 0; /* total length of j or a array to be received */ 5792 k = 0; 5793 for (i = 0; i < nrecvs; i++) { 5794 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5795 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5796 for (j = 0; j < nrows; j++) { 5797 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5798 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5799 k++; 5800 } 5801 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5802 } 5803 PetscCall(PetscFree(rvalues)); 5804 5805 /* allocate space for j and a arrays of B_oth */ 5806 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5807 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5808 5809 /* j-array */ 5810 /*---------*/ 5811 /* post receives of j-array */ 5812 for (i = 0; i < nrecvs; i++) { 5813 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5814 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5815 } 5816 5817 /* pack the outgoing message j-array */ 5818 if (nsends) k = sstarts[0]; 5819 for (i = 0; i < nsends; i++) { 5820 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5821 bufJ = bufj + sstartsj[i]; 5822 for (j = 0; j < nrows; j++) { 5823 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5824 for (ll = 0; ll < sbs; ll++) { 5825 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5826 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5827 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5828 } 5829 } 5830 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5831 } 5832 5833 /* recvs and sends of j-array are completed */ 5834 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5835 } else if (scall == MAT_REUSE_MATRIX) { 5836 sstartsj = *startsj_s; 5837 rstartsj = *startsj_r; 5838 bufa = *bufa_ptr; 5839 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5840 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5841 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5842 5843 /* a-array */ 5844 /*---------*/ 5845 /* post receives of a-array */ 5846 for (i = 0; i < nrecvs; i++) { 5847 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5848 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5849 } 5850 5851 /* pack the outgoing message a-array */ 5852 if (nsends) k = sstarts[0]; 5853 for (i = 0; i < nsends; i++) { 5854 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5855 bufA = bufa + sstartsj[i]; 5856 for (j = 0; j < nrows; j++) { 5857 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5858 for (ll = 0; ll < sbs; ll++) { 5859 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5860 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5861 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5862 } 5863 } 5864 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5865 } 5866 /* recvs and sends of a-array are completed */ 5867 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5868 PetscCall(PetscFree(reqs)); 5869 5870 if (scall == MAT_INITIAL_MATRIX) { 5871 /* put together the new matrix */ 5872 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5873 5874 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5875 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5876 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5877 b_oth->free_a = PETSC_TRUE; 5878 b_oth->free_ij = PETSC_TRUE; 5879 b_oth->nonew = 0; 5880 5881 PetscCall(PetscFree(bufj)); 5882 if (!startsj_s || !bufa_ptr) { 5883 PetscCall(PetscFree2(sstartsj, rstartsj)); 5884 PetscCall(PetscFree(bufa_ptr)); 5885 } else { 5886 *startsj_s = sstartsj; 5887 *startsj_r = rstartsj; 5888 *bufa_ptr = bufa; 5889 } 5890 } else if (scall == MAT_REUSE_MATRIX) { 5891 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5892 } 5893 5894 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5895 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5896 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5897 PetscFunctionReturn(0); 5898 } 5899 5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5903 #if defined(PETSC_HAVE_MKL_SPARSE) 5904 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5905 #endif 5906 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5907 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5908 #if defined(PETSC_HAVE_ELEMENTAL) 5909 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5910 #endif 5911 #if defined(PETSC_HAVE_SCALAPACK) 5912 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 5913 #endif 5914 #if defined(PETSC_HAVE_HYPRE) 5915 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 5916 #endif 5917 #if defined(PETSC_HAVE_CUDA) 5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 5919 #endif 5920 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 5922 #endif 5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 5924 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 5925 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5926 5927 /* 5928 Computes (B'*A')' since computing B*A directly is untenable 5929 5930 n p p 5931 [ ] [ ] [ ] 5932 m [ A ] * n [ B ] = m [ C ] 5933 [ ] [ ] [ ] 5934 5935 */ 5936 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) { 5937 Mat At, Bt, Ct; 5938 5939 PetscFunctionBegin; 5940 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 5941 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 5942 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 5943 PetscCall(MatDestroy(&At)); 5944 PetscCall(MatDestroy(&Bt)); 5945 PetscCall(MatTransposeSetPrecursor(Ct, C)); 5946 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 5947 PetscCall(MatDestroy(&Ct)); 5948 PetscFunctionReturn(0); 5949 } 5950 5951 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) { 5952 PetscBool cisdense; 5953 5954 PetscFunctionBegin; 5955 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 5956 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 5957 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 5958 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 5959 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 5960 PetscCall(MatSetUp(C)); 5961 5962 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5963 PetscFunctionReturn(0); 5964 } 5965 5966 /* ----------------------------------------------------------------*/ 5967 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) { 5968 Mat_Product *product = C->product; 5969 Mat A = product->A, B = product->B; 5970 5971 PetscFunctionBegin; 5972 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5973 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5974 5975 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5976 C->ops->productsymbolic = MatProductSymbolic_AB; 5977 PetscFunctionReturn(0); 5978 } 5979 5980 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) { 5981 Mat_Product *product = C->product; 5982 5983 PetscFunctionBegin; 5984 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5985 PetscFunctionReturn(0); 5986 } 5987 5988 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 5989 5990 Input Parameters: 5991 5992 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5993 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5994 5995 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 5996 5997 For Set1, j1[] contains column indices of the nonzeros. 5998 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 5999 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6000 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6001 6002 Similar for Set2. 6003 6004 This routine merges the two sets of nonzeros row by row and removes repeats. 6005 6006 Output Parameters: (memory is allocated by the caller) 6007 6008 i[],j[]: the CSR of the merged matrix, which has m rows. 6009 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6010 imap2[]: similar to imap1[], but for Set2. 6011 Note we order nonzeros row-by-row and from left to right. 6012 */ 6013 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) { 6014 PetscInt r, m; /* Row index of mat */ 6015 PetscCount t, t1, t2, b1, e1, b2, e2; 6016 6017 PetscFunctionBegin; 6018 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6019 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6020 i[0] = 0; 6021 for (r = 0; r < m; r++) { /* Do row by row merging */ 6022 b1 = rowBegin1[r]; 6023 e1 = rowEnd1[r]; 6024 b2 = rowBegin2[r]; 6025 e2 = rowEnd2[r]; 6026 while (b1 < e1 && b2 < e2) { 6027 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6028 j[t] = j1[b1]; 6029 imap1[t1] = t; 6030 imap2[t2] = t; 6031 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6032 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6033 t1++; 6034 t2++; 6035 t++; 6036 } else if (j1[b1] < j2[b2]) { 6037 j[t] = j1[b1]; 6038 imap1[t1] = t; 6039 b1 += jmap1[t1 + 1] - jmap1[t1]; 6040 t1++; 6041 t++; 6042 } else { 6043 j[t] = j2[b2]; 6044 imap2[t2] = t; 6045 b2 += jmap2[t2 + 1] - jmap2[t2]; 6046 t2++; 6047 t++; 6048 } 6049 } 6050 /* Merge the remaining in either j1[] or j2[] */ 6051 while (b1 < e1) { 6052 j[t] = j1[b1]; 6053 imap1[t1] = t; 6054 b1 += jmap1[t1 + 1] - jmap1[t1]; 6055 t1++; 6056 t++; 6057 } 6058 while (b2 < e2) { 6059 j[t] = j2[b2]; 6060 imap2[t2] = t; 6061 b2 += jmap2[t2 + 1] - jmap2[t2]; 6062 t2++; 6063 t++; 6064 } 6065 i[r + 1] = t; 6066 } 6067 PetscFunctionReturn(0); 6068 } 6069 6070 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6071 6072 Input Parameters: 6073 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6074 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6075 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6076 6077 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6078 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6079 6080 Output Parameters: 6081 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6082 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6083 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6084 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6085 6086 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6087 Atot: number of entries belonging to the diagonal block. 6088 Annz: number of unique nonzeros belonging to the diagonal block. 6089 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6090 repeats (i.e., same 'i,j' pair). 6091 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6092 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6093 6094 Atot: number of entries belonging to the diagonal block 6095 Annz: number of unique nonzeros belonging to the diagonal block. 6096 6097 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6098 6099 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6100 */ 6101 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) { 6102 PetscInt cstart, cend, rstart, rend, row, col; 6103 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6104 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6105 PetscCount k, m, p, q, r, s, mid; 6106 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6107 6108 PetscFunctionBegin; 6109 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6110 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6111 m = rend - rstart; 6112 6113 for (k = 0; k < n; k++) { 6114 if (i[k] >= 0) break; 6115 } /* Skip negative rows */ 6116 6117 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6118 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6119 */ 6120 while (k < n) { 6121 row = i[k]; 6122 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6123 for (s = k; s < n; s++) 6124 if (i[s] != row) break; 6125 for (p = k; p < s; p++) { 6126 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6127 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6128 } 6129 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6130 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6131 rowBegin[row - rstart] = k; 6132 rowMid[row - rstart] = mid; 6133 rowEnd[row - rstart] = s; 6134 6135 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6136 Atot += mid - k; 6137 Btot += s - mid; 6138 6139 /* Count unique nonzeros of this diag/offdiag row */ 6140 for (p = k; p < mid;) { 6141 col = j[p]; 6142 do { 6143 j[p] += PETSC_MAX_INT; 6144 p++; 6145 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6146 Annz++; 6147 } 6148 6149 for (p = mid; p < s;) { 6150 col = j[p]; 6151 do { p++; } while (p < s && j[p] == col); 6152 Bnnz++; 6153 } 6154 k = s; 6155 } 6156 6157 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6158 PetscCall(PetscMalloc1(Atot, &Aperm)); 6159 PetscCall(PetscMalloc1(Btot, &Bperm)); 6160 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6161 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6162 6163 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6164 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6165 for (r = 0; r < m; r++) { 6166 k = rowBegin[r]; 6167 mid = rowMid[r]; 6168 s = rowEnd[r]; 6169 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6170 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6171 Atot += mid - k; 6172 Btot += s - mid; 6173 6174 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6175 for (p = k; p < mid;) { 6176 col = j[p]; 6177 q = p; 6178 do { p++; } while (p < mid && j[p] == col); 6179 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6180 Annz++; 6181 } 6182 6183 for (p = mid; p < s;) { 6184 col = j[p]; 6185 q = p; 6186 do { p++; } while (p < s && j[p] == col); 6187 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6188 Bnnz++; 6189 } 6190 } 6191 /* Output */ 6192 *Aperm_ = Aperm; 6193 *Annz_ = Annz; 6194 *Atot_ = Atot; 6195 *Ajmap_ = Ajmap; 6196 *Bperm_ = Bperm; 6197 *Bnnz_ = Bnnz; 6198 *Btot_ = Btot; 6199 *Bjmap_ = Bjmap; 6200 PetscFunctionReturn(0); 6201 } 6202 6203 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6204 6205 Input Parameters: 6206 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6207 nnz: number of unique nonzeros in the merged matrix 6208 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6209 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6210 6211 Output Parameter: (memory is allocated by the caller) 6212 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6213 6214 Example: 6215 nnz1 = 4 6216 nnz = 6 6217 imap = [1,3,4,5] 6218 jmap = [0,3,5,6,7] 6219 then, 6220 jmap_new = [0,0,3,3,5,6,7] 6221 */ 6222 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) { 6223 PetscCount k, p; 6224 6225 PetscFunctionBegin; 6226 jmap_new[0] = 0; 6227 p = nnz; /* p loops over jmap_new[] backwards */ 6228 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6229 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6230 } 6231 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6232 PetscFunctionReturn(0); 6233 } 6234 6235 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) { 6236 MPI_Comm comm; 6237 PetscMPIInt rank, size; 6238 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6239 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6240 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6241 6242 PetscFunctionBegin; 6243 PetscCall(PetscFree(mpiaij->garray)); 6244 PetscCall(VecDestroy(&mpiaij->lvec)); 6245 #if defined(PETSC_USE_CTABLE) 6246 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6247 #else 6248 PetscCall(PetscFree(mpiaij->colmap)); 6249 #endif 6250 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6251 mat->assembled = PETSC_FALSE; 6252 mat->was_assembled = PETSC_FALSE; 6253 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6254 6255 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6256 PetscCallMPI(MPI_Comm_size(comm, &size)); 6257 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6258 PetscCall(PetscLayoutSetUp(mat->rmap)); 6259 PetscCall(PetscLayoutSetUp(mat->cmap)); 6260 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6261 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6262 PetscCall(MatGetLocalSize(mat, &m, &n)); 6263 PetscCall(MatGetSize(mat, &M, &N)); 6264 6265 /* ---------------------------------------------------------------------------*/ 6266 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6267 /* entries come first, then local rows, then remote rows. */ 6268 /* ---------------------------------------------------------------------------*/ 6269 PetscCount n1 = coo_n, *perm1; 6270 PetscInt *i1 = coo_i, *j1 = coo_j; 6271 6272 PetscCall(PetscMalloc1(n1, &perm1)); 6273 for (k = 0; k < n1; k++) perm1[k] = k; 6274 6275 /* Manipulate indices so that entries with negative row or col indices will have smallest 6276 row indices, local entries will have greater but negative row indices, and remote entries 6277 will have positive row indices. 6278 */ 6279 for (k = 0; k < n1; k++) { 6280 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6281 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6282 else { 6283 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6284 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6285 } 6286 } 6287 6288 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6289 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6290 for (k = 0; k < n1; k++) { 6291 if (i1[k] > PETSC_MIN_INT) break; 6292 } /* Advance k to the first entry we need to take care of */ 6293 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6294 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6295 6296 /* ---------------------------------------------------------------------------*/ 6297 /* Split local rows into diag/offdiag portions */ 6298 /* ---------------------------------------------------------------------------*/ 6299 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6300 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6301 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6302 6303 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6304 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6305 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6306 6307 /* ---------------------------------------------------------------------------*/ 6308 /* Send remote rows to their owner */ 6309 /* ---------------------------------------------------------------------------*/ 6310 /* Find which rows should be sent to which remote ranks*/ 6311 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6312 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6313 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6314 const PetscInt *ranges; 6315 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6316 6317 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6318 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6319 for (k = rem; k < n1;) { 6320 PetscMPIInt owner; 6321 PetscInt firstRow, lastRow; 6322 6323 /* Locate a row range */ 6324 firstRow = i1[k]; /* first row of this owner */ 6325 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6326 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6327 6328 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6329 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6330 6331 /* All entries in [k,p) belong to this remote owner */ 6332 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6333 PetscMPIInt *sendto2; 6334 PetscInt *nentries2; 6335 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6336 6337 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6338 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6339 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6340 PetscCall(PetscFree2(sendto, nentries2)); 6341 sendto = sendto2; 6342 nentries = nentries2; 6343 maxNsend = maxNsend2; 6344 } 6345 sendto[nsend] = owner; 6346 nentries[nsend] = p - k; 6347 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6348 nsend++; 6349 k = p; 6350 } 6351 6352 /* Build 1st SF to know offsets on remote to send data */ 6353 PetscSF sf1; 6354 PetscInt nroots = 1, nroots2 = 0; 6355 PetscInt nleaves = nsend, nleaves2 = 0; 6356 PetscInt *offsets; 6357 PetscSFNode *iremote; 6358 6359 PetscCall(PetscSFCreate(comm, &sf1)); 6360 PetscCall(PetscMalloc1(nsend, &iremote)); 6361 PetscCall(PetscMalloc1(nsend, &offsets)); 6362 for (k = 0; k < nsend; k++) { 6363 iremote[k].rank = sendto[k]; 6364 iremote[k].index = 0; 6365 nleaves2 += nentries[k]; 6366 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6367 } 6368 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6369 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6370 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6371 PetscCall(PetscSFDestroy(&sf1)); 6372 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6373 6374 /* Build 2nd SF to send remote COOs to their owner */ 6375 PetscSF sf2; 6376 nroots = nroots2; 6377 nleaves = nleaves2; 6378 PetscCall(PetscSFCreate(comm, &sf2)); 6379 PetscCall(PetscSFSetFromOptions(sf2)); 6380 PetscCall(PetscMalloc1(nleaves, &iremote)); 6381 p = 0; 6382 for (k = 0; k < nsend; k++) { 6383 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6384 for (q = 0; q < nentries[k]; q++, p++) { 6385 iremote[p].rank = sendto[k]; 6386 iremote[p].index = offsets[k] + q; 6387 } 6388 } 6389 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6390 6391 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6392 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6393 6394 /* Send the remote COOs to their owner */ 6395 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6396 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6397 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6398 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6399 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6400 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6401 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6402 6403 PetscCall(PetscFree(offsets)); 6404 PetscCall(PetscFree2(sendto, nentries)); 6405 6406 /* ---------------------------------------------------------------*/ 6407 /* Sort received COOs by row along with the permutation array */ 6408 /* ---------------------------------------------------------------*/ 6409 for (k = 0; k < n2; k++) perm2[k] = k; 6410 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6411 6412 /* ---------------------------------------------------------------*/ 6413 /* Split received COOs into diag/offdiag portions */ 6414 /* ---------------------------------------------------------------*/ 6415 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6416 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6417 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6418 6419 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6420 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6421 6422 /* --------------------------------------------------------------------------*/ 6423 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6424 /* --------------------------------------------------------------------------*/ 6425 PetscInt *Ai, *Bi; 6426 PetscInt *Aj, *Bj; 6427 6428 PetscCall(PetscMalloc1(m + 1, &Ai)); 6429 PetscCall(PetscMalloc1(m + 1, &Bi)); 6430 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6431 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6432 6433 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6434 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6435 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6436 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6437 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6438 6439 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6440 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6441 6442 /* --------------------------------------------------------------------------*/ 6443 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6444 /* expect nonzeros in A/B most likely have local contributing entries */ 6445 /* --------------------------------------------------------------------------*/ 6446 PetscInt Annz = Ai[m]; 6447 PetscInt Bnnz = Bi[m]; 6448 PetscCount *Ajmap1_new, *Bjmap1_new; 6449 6450 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6451 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6452 6453 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6454 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6455 6456 PetscCall(PetscFree(Aimap1)); 6457 PetscCall(PetscFree(Ajmap1)); 6458 PetscCall(PetscFree(Bimap1)); 6459 PetscCall(PetscFree(Bjmap1)); 6460 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6461 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6462 PetscCall(PetscFree(perm1)); 6463 PetscCall(PetscFree3(i2, j2, perm2)); 6464 6465 Ajmap1 = Ajmap1_new; 6466 Bjmap1 = Bjmap1_new; 6467 6468 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6469 if (Annz < Annz1 + Annz2) { 6470 PetscInt *Aj_new; 6471 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6472 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6473 PetscCall(PetscFree(Aj)); 6474 Aj = Aj_new; 6475 } 6476 6477 if (Bnnz < Bnnz1 + Bnnz2) { 6478 PetscInt *Bj_new; 6479 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6480 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6481 PetscCall(PetscFree(Bj)); 6482 Bj = Bj_new; 6483 } 6484 6485 /* --------------------------------------------------------------------------------*/ 6486 /* Create new submatrices for on-process and off-process coupling */ 6487 /* --------------------------------------------------------------------------------*/ 6488 PetscScalar *Aa, *Ba; 6489 MatType rtype; 6490 Mat_SeqAIJ *a, *b; 6491 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6492 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6493 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6494 if (cstart) { 6495 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6496 } 6497 PetscCall(MatDestroy(&mpiaij->A)); 6498 PetscCall(MatDestroy(&mpiaij->B)); 6499 PetscCall(MatGetRootType_Private(mat, &rtype)); 6500 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6501 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6502 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6503 6504 a = (Mat_SeqAIJ *)mpiaij->A->data; 6505 b = (Mat_SeqAIJ *)mpiaij->B->data; 6506 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6507 a->free_a = b->free_a = PETSC_TRUE; 6508 a->free_ij = b->free_ij = PETSC_TRUE; 6509 6510 /* conversion must happen AFTER multiply setup */ 6511 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6512 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6513 PetscCall(VecDestroy(&mpiaij->lvec)); 6514 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6515 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)mpiaij->lvec)); 6516 6517 mpiaij->coo_n = coo_n; 6518 mpiaij->coo_sf = sf2; 6519 mpiaij->sendlen = nleaves; 6520 mpiaij->recvlen = nroots; 6521 6522 mpiaij->Annz = Annz; 6523 mpiaij->Bnnz = Bnnz; 6524 6525 mpiaij->Annz2 = Annz2; 6526 mpiaij->Bnnz2 = Bnnz2; 6527 6528 mpiaij->Atot1 = Atot1; 6529 mpiaij->Atot2 = Atot2; 6530 mpiaij->Btot1 = Btot1; 6531 mpiaij->Btot2 = Btot2; 6532 6533 mpiaij->Ajmap1 = Ajmap1; 6534 mpiaij->Aperm1 = Aperm1; 6535 6536 mpiaij->Bjmap1 = Bjmap1; 6537 mpiaij->Bperm1 = Bperm1; 6538 6539 mpiaij->Aimap2 = Aimap2; 6540 mpiaij->Ajmap2 = Ajmap2; 6541 mpiaij->Aperm2 = Aperm2; 6542 6543 mpiaij->Bimap2 = Bimap2; 6544 mpiaij->Bjmap2 = Bjmap2; 6545 mpiaij->Bperm2 = Bperm2; 6546 6547 mpiaij->Cperm1 = Cperm1; 6548 6549 /* Allocate in preallocation. If not used, it has zero cost on host */ 6550 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6551 PetscFunctionReturn(0); 6552 } 6553 6554 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) { 6555 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6556 Mat A = mpiaij->A, B = mpiaij->B; 6557 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6558 PetscScalar *Aa, *Ba; 6559 PetscScalar *sendbuf = mpiaij->sendbuf; 6560 PetscScalar *recvbuf = mpiaij->recvbuf; 6561 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6562 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6563 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6564 const PetscCount *Cperm1 = mpiaij->Cperm1; 6565 6566 PetscFunctionBegin; 6567 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6568 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6569 6570 /* Pack entries to be sent to remote */ 6571 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6572 6573 /* Send remote entries to their owner and overlap the communication with local computation */ 6574 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6575 /* Add local entries to A and B */ 6576 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6577 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6578 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6579 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6580 } 6581 for (PetscCount i = 0; i < Bnnz; i++) { 6582 PetscScalar sum = 0.0; 6583 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6584 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6585 } 6586 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6587 6588 /* Add received remote entries to A and B */ 6589 for (PetscCount i = 0; i < Annz2; i++) { 6590 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6591 } 6592 for (PetscCount i = 0; i < Bnnz2; i++) { 6593 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6594 } 6595 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6596 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6597 PetscFunctionReturn(0); 6598 } 6599 6600 /* ----------------------------------------------------------------*/ 6601 6602 /*MC 6603 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6604 6605 Options Database Keys: 6606 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6607 6608 Level: beginner 6609 6610 Notes: 6611 `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values, 6612 in this case the values associated with the rows and columns one passes in are set to zero 6613 in the matrix 6614 6615 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6616 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6617 6618 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6619 M*/ 6620 6621 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) { 6622 Mat_MPIAIJ *b; 6623 PetscMPIInt size; 6624 6625 PetscFunctionBegin; 6626 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6627 6628 PetscCall(PetscNewLog(B, &b)); 6629 B->data = (void *)b; 6630 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6631 B->assembled = PETSC_FALSE; 6632 B->insertmode = NOT_SET_VALUES; 6633 b->size = size; 6634 6635 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6636 6637 /* build cache for off array entries formed */ 6638 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6639 6640 b->donotstash = PETSC_FALSE; 6641 b->colmap = NULL; 6642 b->garray = NULL; 6643 b->roworiented = PETSC_TRUE; 6644 6645 /* stuff used for matrix vector multiply */ 6646 b->lvec = NULL; 6647 b->Mvctx = NULL; 6648 6649 /* stuff for MatGetRow() */ 6650 b->rowindices = NULL; 6651 b->rowvalues = NULL; 6652 b->getrowactive = PETSC_FALSE; 6653 6654 /* flexible pointer used in CUSPARSE classes */ 6655 b->spptr = NULL; 6656 6657 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6658 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6659 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6660 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6661 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6662 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6663 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6664 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6665 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6666 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6667 #if defined(PETSC_HAVE_CUDA) 6668 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6669 #endif 6670 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6671 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6672 #endif 6673 #if defined(PETSC_HAVE_MKL_SPARSE) 6674 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6675 #endif 6676 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6677 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6678 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6679 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6680 #if defined(PETSC_HAVE_ELEMENTAL) 6681 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6682 #endif 6683 #if defined(PETSC_HAVE_SCALAPACK) 6684 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6685 #endif 6686 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6687 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6688 #if defined(PETSC_HAVE_HYPRE) 6689 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6690 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6691 #endif 6692 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6693 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6694 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6695 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6696 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6697 PetscFunctionReturn(0); 6698 } 6699 6700 /*@C 6701 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6702 and "off-diagonal" part of the matrix in CSR format. 6703 6704 Collective 6705 6706 Input Parameters: 6707 + comm - MPI communicator 6708 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6709 . n - This value should be the same as the local size used in creating the 6710 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6711 calculated if N is given) For square matrices n is almost always m. 6712 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 6713 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 6714 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6715 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6716 . a - matrix values 6717 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6718 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6719 - oa - matrix values 6720 6721 Output Parameter: 6722 . mat - the matrix 6723 6724 Level: advanced 6725 6726 Notes: 6727 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6728 must free the arrays once the matrix has been destroyed and not before. 6729 6730 The i and j indices are 0 based 6731 6732 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6733 6734 This sets local rows and cannot be used to set off-processor values. 6735 6736 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6737 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6738 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6739 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6740 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6741 communication if it is known that only local entries will be set. 6742 6743 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6744 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6745 @*/ 6746 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) { 6747 Mat_MPIAIJ *maij; 6748 6749 PetscFunctionBegin; 6750 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6751 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6752 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6753 PetscCall(MatCreate(comm, mat)); 6754 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6755 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6756 maij = (Mat_MPIAIJ *)(*mat)->data; 6757 6758 (*mat)->preallocated = PETSC_TRUE; 6759 6760 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6761 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6762 6763 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6764 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6765 6766 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6767 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6768 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6769 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6770 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6771 PetscFunctionReturn(0); 6772 } 6773 6774 typedef struct { 6775 Mat *mp; /* intermediate products */ 6776 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6777 PetscInt cp; /* number of intermediate products */ 6778 6779 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6780 PetscInt *startsj_s, *startsj_r; 6781 PetscScalar *bufa; 6782 Mat P_oth; 6783 6784 /* may take advantage of merging product->B */ 6785 Mat Bloc; /* B-local by merging diag and off-diag */ 6786 6787 /* cusparse does not have support to split between symbolic and numeric phases. 6788 When api_user is true, we don't need to update the numerical values 6789 of the temporary storage */ 6790 PetscBool reusesym; 6791 6792 /* support for COO values insertion */ 6793 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6794 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6795 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6796 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6797 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6798 PetscMemType mtype; 6799 6800 /* customization */ 6801 PetscBool abmerge; 6802 PetscBool P_oth_bind; 6803 } MatMatMPIAIJBACKEND; 6804 6805 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) { 6806 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6807 PetscInt i; 6808 6809 PetscFunctionBegin; 6810 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6811 PetscCall(PetscFree(mmdata->bufa)); 6812 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6813 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6814 PetscCall(MatDestroy(&mmdata->P_oth)); 6815 PetscCall(MatDestroy(&mmdata->Bloc)); 6816 PetscCall(PetscSFDestroy(&mmdata->sf)); 6817 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6818 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6819 PetscCall(PetscFree(mmdata->own[0])); 6820 PetscCall(PetscFree(mmdata->own)); 6821 PetscCall(PetscFree(mmdata->off[0])); 6822 PetscCall(PetscFree(mmdata->off)); 6823 PetscCall(PetscFree(mmdata)); 6824 PetscFunctionReturn(0); 6825 } 6826 6827 /* Copy selected n entries with indices in idx[] of A to v[]. 6828 If idx is NULL, copy the whole data array of A to v[] 6829 */ 6830 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) { 6831 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6832 6833 PetscFunctionBegin; 6834 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 6835 if (f) { 6836 PetscCall((*f)(A, n, idx, v)); 6837 } else { 6838 const PetscScalar *vv; 6839 6840 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 6841 if (n && idx) { 6842 PetscScalar *w = v; 6843 const PetscInt *oi = idx; 6844 PetscInt j; 6845 6846 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6847 } else { 6848 PetscCall(PetscArraycpy(v, vv, n)); 6849 } 6850 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 6851 } 6852 PetscFunctionReturn(0); 6853 } 6854 6855 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) { 6856 MatMatMPIAIJBACKEND *mmdata; 6857 PetscInt i, n_d, n_o; 6858 6859 PetscFunctionBegin; 6860 MatCheckProduct(C, 1); 6861 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 6862 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 6863 if (!mmdata->reusesym) { /* update temporary matrices */ 6864 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 6865 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 6866 } 6867 mmdata->reusesym = PETSC_FALSE; 6868 6869 for (i = 0; i < mmdata->cp; i++) { 6870 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 6871 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6872 } 6873 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6874 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 6875 6876 if (mmdata->mptmp[i]) continue; 6877 if (noff) { 6878 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 6879 6880 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 6881 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 6882 n_o += noff; 6883 n_d += nown; 6884 } else { 6885 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 6886 6887 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 6888 n_d += mm->nz; 6889 } 6890 } 6891 if (mmdata->hasoffproc) { /* offprocess insertion */ 6892 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 6893 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 6894 } 6895 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 6896 PetscFunctionReturn(0); 6897 } 6898 6899 /* Support for Pt * A, A * P, or Pt * A * P */ 6900 #define MAX_NUMBER_INTERMEDIATE 4 6901 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) { 6902 Mat_Product *product = C->product; 6903 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6904 Mat_MPIAIJ *a, *p; 6905 MatMatMPIAIJBACKEND *mmdata; 6906 ISLocalToGlobalMapping P_oth_l2g = NULL; 6907 IS glob = NULL; 6908 const char *prefix; 6909 char pprefix[256]; 6910 const PetscInt *globidx, *P_oth_idx; 6911 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 6912 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 6913 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6914 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6915 /* a base offset; type-2: sparse with a local to global map table */ 6916 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6917 6918 MatProductType ptype; 6919 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk; 6920 PetscMPIInt size; 6921 6922 PetscFunctionBegin; 6923 MatCheckProduct(C, 1); 6924 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 6925 ptype = product->type; 6926 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 6927 ptype = MATPRODUCT_AB; 6928 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6929 } 6930 switch (ptype) { 6931 case MATPRODUCT_AB: 6932 A = product->A; 6933 P = product->B; 6934 m = A->rmap->n; 6935 n = P->cmap->n; 6936 M = A->rmap->N; 6937 N = P->cmap->N; 6938 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6939 break; 6940 case MATPRODUCT_AtB: 6941 P = product->A; 6942 A = product->B; 6943 m = P->cmap->n; 6944 n = A->cmap->n; 6945 M = P->cmap->N; 6946 N = A->cmap->N; 6947 hasoffproc = PETSC_TRUE; 6948 break; 6949 case MATPRODUCT_PtAP: 6950 A = product->A; 6951 P = product->B; 6952 m = P->cmap->n; 6953 n = P->cmap->n; 6954 M = P->cmap->N; 6955 N = P->cmap->N; 6956 hasoffproc = PETSC_TRUE; 6957 break; 6958 default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 6959 } 6960 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 6961 if (size == 1) hasoffproc = PETSC_FALSE; 6962 6963 /* defaults */ 6964 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 6965 mp[i] = NULL; 6966 mptmp[i] = PETSC_FALSE; 6967 rmapt[i] = -1; 6968 cmapt[i] = -1; 6969 rmapa[i] = NULL; 6970 cmapa[i] = NULL; 6971 } 6972 6973 /* customization */ 6974 PetscCall(PetscNew(&mmdata)); 6975 mmdata->reusesym = product->api_user; 6976 if (ptype == MATPRODUCT_AB) { 6977 if (product->api_user) { 6978 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 6979 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 6980 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 6981 PetscOptionsEnd(); 6982 } else { 6983 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 6984 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 6985 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 6986 PetscOptionsEnd(); 6987 } 6988 } else if (ptype == MATPRODUCT_PtAP) { 6989 if (product->api_user) { 6990 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 6991 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 6992 PetscOptionsEnd(); 6993 } else { 6994 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 6995 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 6996 PetscOptionsEnd(); 6997 } 6998 } 6999 a = (Mat_MPIAIJ *)A->data; 7000 p = (Mat_MPIAIJ *)P->data; 7001 PetscCall(MatSetSizes(C, m, n, M, N)); 7002 PetscCall(PetscLayoutSetUp(C->rmap)); 7003 PetscCall(PetscLayoutSetUp(C->cmap)); 7004 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7005 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7006 7007 cp = 0; 7008 switch (ptype) { 7009 case MATPRODUCT_AB: /* A * P */ 7010 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7011 7012 /* A_diag * P_local (merged or not) */ 7013 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7014 /* P is product->B */ 7015 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7016 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7017 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7018 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7019 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7020 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7021 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7022 mp[cp]->product->api_user = product->api_user; 7023 PetscCall(MatProductSetFromOptions(mp[cp])); 7024 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7025 PetscCall(ISGetIndices(glob, &globidx)); 7026 rmapt[cp] = 1; 7027 cmapt[cp] = 2; 7028 cmapa[cp] = globidx; 7029 mptmp[cp] = PETSC_FALSE; 7030 cp++; 7031 } else { /* A_diag * P_diag and A_diag * P_off */ 7032 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7033 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7034 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7035 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7036 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7037 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7038 mp[cp]->product->api_user = product->api_user; 7039 PetscCall(MatProductSetFromOptions(mp[cp])); 7040 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7041 rmapt[cp] = 1; 7042 cmapt[cp] = 1; 7043 mptmp[cp] = PETSC_FALSE; 7044 cp++; 7045 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7046 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7047 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7048 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7049 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7050 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7051 mp[cp]->product->api_user = product->api_user; 7052 PetscCall(MatProductSetFromOptions(mp[cp])); 7053 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7054 rmapt[cp] = 1; 7055 cmapt[cp] = 2; 7056 cmapa[cp] = p->garray; 7057 mptmp[cp] = PETSC_FALSE; 7058 cp++; 7059 } 7060 7061 /* A_off * P_other */ 7062 if (mmdata->P_oth) { 7063 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7064 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7065 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7066 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7067 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7068 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7069 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7070 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7071 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7072 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7073 mp[cp]->product->api_user = product->api_user; 7074 PetscCall(MatProductSetFromOptions(mp[cp])); 7075 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7076 rmapt[cp] = 1; 7077 cmapt[cp] = 2; 7078 cmapa[cp] = P_oth_idx; 7079 mptmp[cp] = PETSC_FALSE; 7080 cp++; 7081 } 7082 break; 7083 7084 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7085 /* A is product->B */ 7086 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7087 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7088 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7089 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7090 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7091 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7092 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7093 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7094 mp[cp]->product->api_user = product->api_user; 7095 PetscCall(MatProductSetFromOptions(mp[cp])); 7096 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7097 PetscCall(ISGetIndices(glob, &globidx)); 7098 rmapt[cp] = 2; 7099 rmapa[cp] = globidx; 7100 cmapt[cp] = 2; 7101 cmapa[cp] = globidx; 7102 mptmp[cp] = PETSC_FALSE; 7103 cp++; 7104 } else { 7105 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7106 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7107 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7108 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7109 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7110 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7111 mp[cp]->product->api_user = product->api_user; 7112 PetscCall(MatProductSetFromOptions(mp[cp])); 7113 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7114 PetscCall(ISGetIndices(glob, &globidx)); 7115 rmapt[cp] = 1; 7116 cmapt[cp] = 2; 7117 cmapa[cp] = globidx; 7118 mptmp[cp] = PETSC_FALSE; 7119 cp++; 7120 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7121 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7122 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7123 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7124 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7125 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7126 mp[cp]->product->api_user = product->api_user; 7127 PetscCall(MatProductSetFromOptions(mp[cp])); 7128 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7129 rmapt[cp] = 2; 7130 rmapa[cp] = p->garray; 7131 cmapt[cp] = 2; 7132 cmapa[cp] = globidx; 7133 mptmp[cp] = PETSC_FALSE; 7134 cp++; 7135 } 7136 break; 7137 case MATPRODUCT_PtAP: 7138 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7139 /* P is product->B */ 7140 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7141 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7142 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7143 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7144 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7145 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7146 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7147 mp[cp]->product->api_user = product->api_user; 7148 PetscCall(MatProductSetFromOptions(mp[cp])); 7149 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7150 PetscCall(ISGetIndices(glob, &globidx)); 7151 rmapt[cp] = 2; 7152 rmapa[cp] = globidx; 7153 cmapt[cp] = 2; 7154 cmapa[cp] = globidx; 7155 mptmp[cp] = PETSC_FALSE; 7156 cp++; 7157 if (mmdata->P_oth) { 7158 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7159 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7160 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7161 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7162 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7163 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7164 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7165 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7166 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7167 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7168 mp[cp]->product->api_user = product->api_user; 7169 PetscCall(MatProductSetFromOptions(mp[cp])); 7170 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7171 mptmp[cp] = PETSC_TRUE; 7172 cp++; 7173 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7174 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7175 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7176 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7177 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7178 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7179 mp[cp]->product->api_user = product->api_user; 7180 PetscCall(MatProductSetFromOptions(mp[cp])); 7181 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7182 rmapt[cp] = 2; 7183 rmapa[cp] = globidx; 7184 cmapt[cp] = 2; 7185 cmapa[cp] = P_oth_idx; 7186 mptmp[cp] = PETSC_FALSE; 7187 cp++; 7188 } 7189 break; 7190 default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7191 } 7192 /* sanity check */ 7193 if (size > 1) 7194 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7195 7196 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7197 for (i = 0; i < cp; i++) { 7198 mmdata->mp[i] = mp[i]; 7199 mmdata->mptmp[i] = mptmp[i]; 7200 } 7201 mmdata->cp = cp; 7202 C->product->data = mmdata; 7203 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7204 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7205 7206 /* memory type */ 7207 mmdata->mtype = PETSC_MEMTYPE_HOST; 7208 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7209 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7210 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7211 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7212 7213 /* prepare coo coordinates for values insertion */ 7214 7215 /* count total nonzeros of those intermediate seqaij Mats 7216 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7217 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7218 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7219 */ 7220 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7221 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7222 if (mptmp[cp]) continue; 7223 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7224 const PetscInt *rmap = rmapa[cp]; 7225 const PetscInt mr = mp[cp]->rmap->n; 7226 const PetscInt rs = C->rmap->rstart; 7227 const PetscInt re = C->rmap->rend; 7228 const PetscInt *ii = mm->i; 7229 for (i = 0; i < mr; i++) { 7230 const PetscInt gr = rmap[i]; 7231 const PetscInt nz = ii[i + 1] - ii[i]; 7232 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7233 else ncoo_oown += nz; /* this row is local */ 7234 } 7235 } else ncoo_d += mm->nz; 7236 } 7237 7238 /* 7239 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7240 7241 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7242 7243 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7244 7245 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7246 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7247 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7248 7249 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7250 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7251 */ 7252 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7253 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7254 7255 /* gather (i,j) of nonzeros inserted by remote procs */ 7256 if (hasoffproc) { 7257 PetscSF msf; 7258 PetscInt ncoo2, *coo_i2, *coo_j2; 7259 7260 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7261 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7262 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7263 7264 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7265 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7266 PetscInt *idxoff = mmdata->off[cp]; 7267 PetscInt *idxown = mmdata->own[cp]; 7268 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7269 const PetscInt *rmap = rmapa[cp]; 7270 const PetscInt *cmap = cmapa[cp]; 7271 const PetscInt *ii = mm->i; 7272 PetscInt *coi = coo_i + ncoo_o; 7273 PetscInt *coj = coo_j + ncoo_o; 7274 const PetscInt mr = mp[cp]->rmap->n; 7275 const PetscInt rs = C->rmap->rstart; 7276 const PetscInt re = C->rmap->rend; 7277 const PetscInt cs = C->cmap->rstart; 7278 for (i = 0; i < mr; i++) { 7279 const PetscInt *jj = mm->j + ii[i]; 7280 const PetscInt gr = rmap[i]; 7281 const PetscInt nz = ii[i + 1] - ii[i]; 7282 if (gr < rs || gr >= re) { /* this is an offproc row */ 7283 for (j = ii[i]; j < ii[i + 1]; j++) { 7284 *coi++ = gr; 7285 *idxoff++ = j; 7286 } 7287 if (!cmapt[cp]) { /* already global */ 7288 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7289 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7290 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7291 } else { /* offdiag */ 7292 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7293 } 7294 ncoo_o += nz; 7295 } else { /* this is a local row */ 7296 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7297 } 7298 } 7299 } 7300 mmdata->off[cp + 1] = idxoff; 7301 mmdata->own[cp + 1] = idxown; 7302 } 7303 7304 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7305 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7306 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7307 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7308 ncoo = ncoo_d + ncoo_oown + ncoo2; 7309 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7310 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7311 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7312 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7313 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7314 PetscCall(PetscFree2(coo_i, coo_j)); 7315 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7316 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7317 coo_i = coo_i2; 7318 coo_j = coo_j2; 7319 } else { /* no offproc values insertion */ 7320 ncoo = ncoo_d; 7321 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7322 7323 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7324 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7325 PetscCall(PetscSFSetUp(mmdata->sf)); 7326 } 7327 mmdata->hasoffproc = hasoffproc; 7328 7329 /* gather (i,j) of nonzeros inserted locally */ 7330 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7331 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7332 PetscInt *coi = coo_i + ncoo_d; 7333 PetscInt *coj = coo_j + ncoo_d; 7334 const PetscInt *jj = mm->j; 7335 const PetscInt *ii = mm->i; 7336 const PetscInt *cmap = cmapa[cp]; 7337 const PetscInt *rmap = rmapa[cp]; 7338 const PetscInt mr = mp[cp]->rmap->n; 7339 const PetscInt rs = C->rmap->rstart; 7340 const PetscInt re = C->rmap->rend; 7341 const PetscInt cs = C->cmap->rstart; 7342 7343 if (mptmp[cp]) continue; 7344 if (rmapt[cp] == 1) { /* consecutive rows */ 7345 /* fill coo_i */ 7346 for (i = 0; i < mr; i++) { 7347 const PetscInt gr = i + rs; 7348 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7349 } 7350 /* fill coo_j */ 7351 if (!cmapt[cp]) { /* type-0, already global */ 7352 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7353 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7354 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7355 } else { /* type-2, local to global for sparse columns */ 7356 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7357 } 7358 ncoo_d += mm->nz; 7359 } else if (rmapt[cp] == 2) { /* sparse rows */ 7360 for (i = 0; i < mr; i++) { 7361 const PetscInt *jj = mm->j + ii[i]; 7362 const PetscInt gr = rmap[i]; 7363 const PetscInt nz = ii[i + 1] - ii[i]; 7364 if (gr >= rs && gr < re) { /* local rows */ 7365 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7366 if (!cmapt[cp]) { /* type-0, already global */ 7367 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7368 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7369 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7370 } else { /* type-2, local to global for sparse columns */ 7371 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7372 } 7373 ncoo_d += nz; 7374 } 7375 } 7376 } 7377 } 7378 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7379 PetscCall(ISDestroy(&glob)); 7380 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7381 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7382 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7383 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7384 7385 /* preallocate with COO data */ 7386 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7387 PetscCall(PetscFree2(coo_i, coo_j)); 7388 PetscFunctionReturn(0); 7389 } 7390 7391 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) { 7392 Mat_Product *product = mat->product; 7393 #if defined(PETSC_HAVE_DEVICE) 7394 PetscBool match = PETSC_FALSE; 7395 PetscBool usecpu = PETSC_FALSE; 7396 #else 7397 PetscBool match = PETSC_TRUE; 7398 #endif 7399 7400 PetscFunctionBegin; 7401 MatCheckProduct(mat, 1); 7402 #if defined(PETSC_HAVE_DEVICE) 7403 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7404 if (match) { /* we can always fallback to the CPU if requested */ 7405 switch (product->type) { 7406 case MATPRODUCT_AB: 7407 if (product->api_user) { 7408 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7409 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7410 PetscOptionsEnd(); 7411 } else { 7412 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7413 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7414 PetscOptionsEnd(); 7415 } 7416 break; 7417 case MATPRODUCT_AtB: 7418 if (product->api_user) { 7419 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7420 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7421 PetscOptionsEnd(); 7422 } else { 7423 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7424 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7425 PetscOptionsEnd(); 7426 } 7427 break; 7428 case MATPRODUCT_PtAP: 7429 if (product->api_user) { 7430 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7431 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7432 PetscOptionsEnd(); 7433 } else { 7434 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7435 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7436 PetscOptionsEnd(); 7437 } 7438 break; 7439 default: break; 7440 } 7441 match = (PetscBool)!usecpu; 7442 } 7443 #endif 7444 if (match) { 7445 switch (product->type) { 7446 case MATPRODUCT_AB: 7447 case MATPRODUCT_AtB: 7448 case MATPRODUCT_PtAP: mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; break; 7449 default: break; 7450 } 7451 } 7452 /* fallback to MPIAIJ ops */ 7453 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7454 PetscFunctionReturn(0); 7455 } 7456 7457 /* 7458 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7459 7460 n - the number of block indices in cc[] 7461 cc - the block indices (must be large enough to contain the indices) 7462 */ 7463 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) { 7464 PetscInt cnt = -1, nidx, j; 7465 const PetscInt *idx; 7466 7467 PetscFunctionBegin; 7468 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7469 if (nidx) { 7470 cnt = 0; 7471 cc[cnt] = idx[0] / bs; 7472 for (j = 1; j < nidx; j++) { 7473 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7474 } 7475 } 7476 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7477 *n = cnt + 1; 7478 PetscFunctionReturn(0); 7479 } 7480 7481 /* 7482 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7483 7484 ncollapsed - the number of block indices 7485 collapsed - the block indices (must be large enough to contain the indices) 7486 */ 7487 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) { 7488 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7489 7490 PetscFunctionBegin; 7491 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7492 for (i = start + 1; i < start + bs; i++) { 7493 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7494 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7495 cprevtmp = cprev; 7496 cprev = merged; 7497 merged = cprevtmp; 7498 } 7499 *ncollapsed = nprev; 7500 if (collapsed) *collapsed = cprev; 7501 PetscFunctionReturn(0); 7502 } 7503 7504 /* -------------------------------------------------------------------------- */ 7505 /* 7506 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7507 7508 Input Parameter: 7509 . Amat - matrix 7510 - symmetrize - make the result symmetric 7511 + scale - scale with diagonal 7512 7513 Output Parameter: 7514 . a_Gmat - output scalar graph >= 0 7515 7516 */ 7517 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) { 7518 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7519 MPI_Comm comm; 7520 Mat Gmat; 7521 PetscBool ismpiaij, isseqaij; 7522 Mat a, b, c; 7523 MatType jtype; 7524 7525 PetscFunctionBegin; 7526 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7527 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7528 PetscCall(MatGetSize(Amat, &MM, &NN)); 7529 PetscCall(MatGetBlockSize(Amat, &bs)); 7530 nloc = (Iend - Istart) / bs; 7531 7532 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7533 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7534 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7535 7536 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7537 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7538 implementation */ 7539 if (bs > 1) { 7540 PetscCall(MatGetType(Amat, &jtype)); 7541 PetscCall(MatCreate(comm, &Gmat)); 7542 PetscCall(MatSetType(Gmat, jtype)); 7543 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7544 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7545 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7546 PetscInt *d_nnz, *o_nnz; 7547 MatScalar *aa, val, AA[4096]; 7548 PetscInt *aj, *ai, AJ[4096], nc; 7549 if (isseqaij) { 7550 a = Amat; 7551 b = NULL; 7552 } else { 7553 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7554 a = d->A; 7555 b = d->B; 7556 } 7557 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7558 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7559 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7560 PetscInt *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0; 7561 const PetscInt *cols; 7562 for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows 7563 PetscCall(MatGetRow(c, brow, &jj, &cols, NULL)); 7564 nnz[brow / bs] = jj / bs; 7565 if (jj % bs) ok = 0; 7566 if (cols) j0 = cols[0]; 7567 else j0 = -1; 7568 PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL)); 7569 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7570 for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks 7571 PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL)); 7572 if (jj % bs) ok = 0; 7573 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7574 if (nnz[brow / bs] != jj / bs) ok = 0; 7575 PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL)); 7576 } 7577 if (!ok) { 7578 PetscCall(PetscFree2(d_nnz, o_nnz)); 7579 goto old_bs; 7580 } 7581 } 7582 PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax); 7583 } 7584 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7585 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7586 PetscCall(PetscFree2(d_nnz, o_nnz)); 7587 // diag 7588 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7589 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7590 ai = aseq->i; 7591 n = ai[brow + 1] - ai[brow]; 7592 aj = aseq->j + ai[brow]; 7593 for (int k = 0; k < n; k += bs) { // block columns 7594 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7595 val = 0; 7596 for (int ii = 0; ii < bs; ii++) { // rows in block 7597 aa = aseq->a + ai[brow + ii] + k; 7598 for (int jj = 0; jj < bs; jj++) { // columns in block 7599 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7600 } 7601 } 7602 AA[k / bs] = val; 7603 } 7604 grow = Istart / bs + brow / bs; 7605 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7606 } 7607 // off-diag 7608 if (ismpiaij) { 7609 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7610 const PetscScalar *vals; 7611 const PetscInt *cols, *garray = aij->garray; 7612 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7613 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7614 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7615 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7616 AA[k / bs] = 0; 7617 AJ[cidx] = garray[cols[k]] / bs; 7618 } 7619 nc = ncols / bs; 7620 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7621 for (int ii = 0; ii < bs; ii++) { // rows in block 7622 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7623 for (int k = 0; k < ncols; k += bs) { 7624 for (int jj = 0; jj < bs; jj++) { // cols in block 7625 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7626 } 7627 } 7628 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7629 } 7630 grow = Istart / bs + brow / bs; 7631 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7632 } 7633 } 7634 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7635 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7636 } else { 7637 const PetscScalar *vals; 7638 const PetscInt *idx; 7639 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7640 old_bs: 7641 /* 7642 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7643 */ 7644 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7645 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7646 if (isseqaij) { 7647 PetscInt max_d_nnz; 7648 /* 7649 Determine exact preallocation count for (sequential) scalar matrix 7650 */ 7651 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7652 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7653 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7654 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7655 PetscCall(PetscFree3(w0, w1, w2)); 7656 } else if (ismpiaij) { 7657 Mat Daij, Oaij; 7658 const PetscInt *garray; 7659 PetscInt max_d_nnz; 7660 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7661 /* 7662 Determine exact preallocation count for diagonal block portion of scalar matrix 7663 */ 7664 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7665 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7666 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7667 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7668 PetscCall(PetscFree3(w0, w1, w2)); 7669 /* 7670 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7671 */ 7672 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7673 o_nnz[jj] = 0; 7674 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7675 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7676 o_nnz[jj] += ncols; 7677 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7678 } 7679 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7680 } 7681 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7682 /* get scalar copy (norms) of matrix */ 7683 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7684 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7685 PetscCall(PetscFree2(d_nnz, o_nnz)); 7686 for (Ii = Istart; Ii < Iend; Ii++) { 7687 PetscInt dest_row = Ii / bs; 7688 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7689 for (jj = 0; jj < ncols; jj++) { 7690 PetscInt dest_col = idx[jj] / bs; 7691 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7692 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7693 } 7694 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7695 } 7696 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7697 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7698 } 7699 } else { 7700 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7701 procedure via MatAbs API */ 7702 /* just copy scalar matrix & abs() */ 7703 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7704 if (isseqaij) { 7705 a = Gmat; 7706 b = NULL; 7707 } else { 7708 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7709 a = d->A; 7710 b = d->B; 7711 } 7712 /* abs */ 7713 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7714 MatInfo info; 7715 PetscScalar *avals; 7716 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7717 PetscCall(MatSeqAIJGetArray(c, &avals)); 7718 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7719 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7720 } 7721 } 7722 if (symmetrize) { 7723 PetscBool isset, issym; 7724 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7725 if (!isset || !issym) { 7726 Mat matTrans; 7727 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7728 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7729 PetscCall(MatDestroy(&matTrans)); 7730 } 7731 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7732 } else { 7733 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7734 } 7735 if (scale) { 7736 /* scale c for all diagonal values = 1 or -1 */ 7737 Vec diag; 7738 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7739 PetscCall(MatGetDiagonal(Gmat, diag)); 7740 PetscCall(VecReciprocal(diag)); 7741 PetscCall(VecSqrtAbs(diag)); 7742 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7743 PetscCall(VecDestroy(&diag)); 7744 } 7745 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7746 *a_Gmat = Gmat; 7747 PetscFunctionReturn(0); 7748 } 7749 7750 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) { 7751 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7752 Mat tGmat; 7753 MPI_Comm comm; 7754 const PetscScalar *vals; 7755 const PetscInt *idx; 7756 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7757 MatScalar *AA; // this is checked in graph 7758 PetscBool isseqaij; 7759 Mat a, b, c; 7760 MatType jtype; 7761 7762 PetscFunctionBegin; 7763 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7764 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7765 PetscCall(MatGetType(Gmat, &jtype)); 7766 PetscCall(MatCreate(comm, &tGmat)); 7767 PetscCall(MatSetType(tGmat, jtype)); 7768 7769 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7770 Also, if the matrix is symmetric, can we skip this 7771 operation? It can be very expensive on large matrices. */ 7772 7773 // global sizes 7774 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7775 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7776 nloc = Iend - Istart; 7777 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7778 if (isseqaij) { 7779 a = Gmat; 7780 b = NULL; 7781 } else { 7782 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7783 a = d->A; 7784 b = d->B; 7785 garray = d->garray; 7786 } 7787 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7788 for (PetscInt row = 0; row < nloc; row++) { 7789 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7790 d_nnz[row] = ncols; 7791 if (ncols > maxcols) maxcols = ncols; 7792 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7793 } 7794 if (b) { 7795 for (PetscInt row = 0; row < nloc; row++) { 7796 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7797 o_nnz[row] = ncols; 7798 if (ncols > maxcols) maxcols = ncols; 7799 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7800 } 7801 } 7802 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7803 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7804 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7805 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7806 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7807 PetscCall(PetscFree2(d_nnz, o_nnz)); 7808 // 7809 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7810 nnz0 = nnz1 = 0; 7811 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7812 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7813 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7814 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7815 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7816 if (PetscRealPart(sv) > vfilter) { 7817 nnz1++; 7818 PetscInt cid = idx[jj] + Istart; //diag 7819 if (c != a) cid = garray[idx[jj]]; 7820 AA[ncol_row] = vals[jj]; 7821 AJ[ncol_row] = cid; 7822 ncol_row++; 7823 } 7824 } 7825 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7826 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7827 } 7828 } 7829 PetscCall(PetscFree2(AA, AJ)); 7830 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7831 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7832 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7833 7834 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7835 7836 *filteredG = tGmat; 7837 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7838 PetscFunctionReturn(0); 7839 } 7840 7841 /* 7842 Special version for direct calls from Fortran 7843 */ 7844 #include <petsc/private/fortranimpl.h> 7845 7846 /* Change these macros so can be used in void function */ 7847 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7848 #undef PetscCall 7849 #define PetscCall(...) \ 7850 do { \ 7851 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7852 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7853 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 7854 return; \ 7855 } \ 7856 } while (0) 7857 7858 #undef SETERRQ 7859 #define SETERRQ(comm, ierr, ...) \ 7860 do { \ 7861 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 7862 return; \ 7863 } while (0) 7864 7865 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7866 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7867 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7868 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7869 #else 7870 #endif 7871 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) { 7872 Mat mat = *mmat; 7873 PetscInt m = *mm, n = *mn; 7874 InsertMode addv = *maddv; 7875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 7876 PetscScalar value; 7877 7878 MatCheckPreallocated(mat, 1); 7879 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7880 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 7881 { 7882 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 7883 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 7884 PetscBool roworiented = aij->roworiented; 7885 7886 /* Some Variables required in the macro */ 7887 Mat A = aij->A; 7888 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 7889 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 7890 MatScalar *aa; 7891 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7892 Mat B = aij->B; 7893 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 7894 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 7895 MatScalar *ba; 7896 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7897 * cannot use "#if defined" inside a macro. */ 7898 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7899 7900 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 7901 PetscInt nonew = a->nonew; 7902 MatScalar *ap1, *ap2; 7903 7904 PetscFunctionBegin; 7905 PetscCall(MatSeqAIJGetArray(A, &aa)); 7906 PetscCall(MatSeqAIJGetArray(B, &ba)); 7907 for (i = 0; i < m; i++) { 7908 if (im[i] < 0) continue; 7909 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 7910 if (im[i] >= rstart && im[i] < rend) { 7911 row = im[i] - rstart; 7912 lastcol1 = -1; 7913 rp1 = aj + ai[row]; 7914 ap1 = aa + ai[row]; 7915 rmax1 = aimax[row]; 7916 nrow1 = ailen[row]; 7917 low1 = 0; 7918 high1 = nrow1; 7919 lastcol2 = -1; 7920 rp2 = bj + bi[row]; 7921 ap2 = ba + bi[row]; 7922 rmax2 = bimax[row]; 7923 nrow2 = bilen[row]; 7924 low2 = 0; 7925 high2 = nrow2; 7926 7927 for (j = 0; j < n; j++) { 7928 if (roworiented) value = v[i * n + j]; 7929 else value = v[i + j * m]; 7930 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7931 if (in[j] >= cstart && in[j] < cend) { 7932 col = in[j] - cstart; 7933 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 7934 } else if (in[j] < 0) continue; 7935 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7936 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7937 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 7938 } else { 7939 if (mat->was_assembled) { 7940 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7941 #if defined(PETSC_USE_CTABLE) 7942 PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); 7943 col--; 7944 #else 7945 col = aij->colmap[in[j]] - 1; 7946 #endif 7947 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 7948 PetscCall(MatDisAssemble_MPIAIJ(mat)); 7949 col = in[j]; 7950 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7951 B = aij->B; 7952 b = (Mat_SeqAIJ *)B->data; 7953 bimax = b->imax; 7954 bi = b->i; 7955 bilen = b->ilen; 7956 bj = b->j; 7957 rp2 = bj + bi[row]; 7958 ap2 = ba + bi[row]; 7959 rmax2 = bimax[row]; 7960 nrow2 = bilen[row]; 7961 low2 = 0; 7962 high2 = nrow2; 7963 bm = aij->B->rmap->n; 7964 ba = b->a; 7965 inserted = PETSC_FALSE; 7966 } 7967 } else col = in[j]; 7968 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 7969 } 7970 } 7971 } else if (!aij->donotstash) { 7972 if (roworiented) { 7973 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7974 } else { 7975 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7976 } 7977 } 7978 } 7979 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 7980 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 7981 } 7982 PetscFunctionReturnVoid(); 7983 } 7984 7985 /* Undefining these here since they were redefined from their original definition above! No 7986 * other PETSc functions should be defined past this point, as it is impossible to recover the 7987 * original definitions */ 7988 #undef PetscCall 7989 #undef SETERRQ 7990