1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 15 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 16 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 17 PetscCall(MatDestroy(&B)); 18 PetscFunctionReturn(0); 19 } 20 21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 22 { 23 Mat B; 24 25 PetscFunctionBegin; 26 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 27 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 28 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 29 PetscFunctionReturn(0); 30 } 31 32 /*MC 33 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 34 35 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 36 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 37 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 38 for communicators controlling multiple processes. It is recommended that you call both of 39 the above preallocation routines for simplicity. 40 41 Options Database Keys: 42 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 43 44 Developer Note: 45 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 46 enough exist. 47 48 Level: beginner 49 50 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 51 M*/ 52 53 /*MC 54 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 55 56 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 57 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 58 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 59 for communicators controlling multiple processes. It is recommended that you call both of 60 the above preallocation routines for simplicity. 61 62 Options Database Keys: 63 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 64 65 Level: beginner 66 67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 68 M*/ 69 70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 71 { 72 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 73 74 PetscFunctionBegin; 75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 76 A->boundtocpu = flg; 77 #endif 78 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 79 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 80 81 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 82 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 83 * to differ from the parent matrix. */ 84 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 85 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 86 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 91 { 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 93 94 PetscFunctionBegin; 95 if (mat->A) { 96 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 97 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 103 { 104 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 105 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 106 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 107 const PetscInt *ia, *ib; 108 const MatScalar *aa, *bb, *aav, *bav; 109 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 110 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 111 112 PetscFunctionBegin; 113 *keptrows = NULL; 114 115 ia = a->i; 116 ib = b->i; 117 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 118 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 119 for (i = 0; i < m; i++) { 120 na = ia[i + 1] - ia[i]; 121 nb = ib[i + 1] - ib[i]; 122 if (!na && !nb) { 123 cnt++; 124 goto ok1; 125 } 126 aa = aav + ia[i]; 127 for (j = 0; j < na; j++) { 128 if (aa[j] != 0.0) goto ok1; 129 } 130 bb = bav + ib[i]; 131 for (j = 0; j < nb; j++) { 132 if (bb[j] != 0.0) goto ok1; 133 } 134 cnt++; 135 ok1:; 136 } 137 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 138 if (!n0rows) { 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 140 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 141 PetscFunctionReturn(0); 142 } 143 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 144 cnt = 0; 145 for (i = 0; i < m; i++) { 146 na = ia[i + 1] - ia[i]; 147 nb = ib[i + 1] - ib[i]; 148 if (!na && !nb) continue; 149 aa = aav + ia[i]; 150 for (j = 0; j < na; j++) { 151 if (aa[j] != 0.0) { 152 rows[cnt++] = rstart + i; 153 goto ok2; 154 } 155 } 156 bb = bav + ib[i]; 157 for (j = 0; j < nb; j++) { 158 if (bb[j] != 0.0) { 159 rows[cnt++] = rstart + i; 160 goto ok2; 161 } 162 } 163 ok2:; 164 } 165 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 167 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 174 PetscBool cong; 175 176 PetscFunctionBegin; 177 PetscCall(MatHasCongruentLayouts(Y, &cong)); 178 if (Y->assembled && cong) { 179 PetscCall(MatDiagonalSet(aij->A, D, is)); 180 } else { 181 PetscCall(MatDiagonalSet_Default(Y, D, is)); 182 } 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 189 PetscInt i, rstart, nrows, *rows; 190 191 PetscFunctionBegin; 192 *zrows = NULL; 193 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 194 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 195 for (i = 0; i < nrows; i++) rows[i] += rstart; 196 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 201 { 202 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 203 PetscInt i, m, n, *garray = aij->garray; 204 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 205 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 206 PetscReal *work; 207 const PetscScalar *dummy; 208 209 PetscFunctionBegin; 210 PetscCall(MatGetSize(A, &m, &n)); 211 PetscCall(PetscCalloc1(n, &work)); 212 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 213 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 214 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 215 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 216 if (type == NORM_2) { 217 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 218 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 219 } else if (type == NORM_1) { 220 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 221 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 222 } else if (type == NORM_INFINITY) { 223 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 224 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 225 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 226 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 227 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 228 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 229 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 230 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 231 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 232 if (type == NORM_INFINITY) { 233 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 234 } else { 235 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 236 } 237 PetscCall(PetscFree(work)); 238 if (type == NORM_2) { 239 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 240 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 241 for (i = 0; i < n; i++) reductions[i] /= m; 242 } 243 PetscFunctionReturn(0); 244 } 245 246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 247 { 248 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 249 IS sis, gis; 250 const PetscInt *isis, *igis; 251 PetscInt n, *iis, nsis, ngis, rstart, i; 252 253 PetscFunctionBegin; 254 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 255 PetscCall(MatFindNonzeroRows(a->B, &gis)); 256 PetscCall(ISGetSize(gis, &ngis)); 257 PetscCall(ISGetSize(sis, &nsis)); 258 PetscCall(ISGetIndices(sis, &isis)); 259 PetscCall(ISGetIndices(gis, &igis)); 260 261 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 262 PetscCall(PetscArraycpy(iis, igis, ngis)); 263 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 264 n = ngis + nsis; 265 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 266 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 267 for (i = 0; i < n; i++) iis[i] += rstart; 268 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 269 270 PetscCall(ISRestoreIndices(sis, &isis)); 271 PetscCall(ISRestoreIndices(gis, &igis)); 272 PetscCall(ISDestroy(&sis)); 273 PetscCall(ISDestroy(&gis)); 274 PetscFunctionReturn(0); 275 } 276 277 /* 278 Local utility routine that creates a mapping from the global column 279 number to the local number in the off-diagonal part of the local 280 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 281 a slightly higher hash table cost; without it it is not scalable (each processor 282 has an order N integer array but is fast to access. 283 */ 284 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 287 PetscInt n = aij->B->cmap->n, i; 288 289 PetscFunctionBegin; 290 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 291 #if defined(PETSC_USE_CTABLE) 292 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 293 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 294 #else 295 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 296 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 297 #endif 298 PetscFunctionReturn(0); 299 } 300 301 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 302 { \ 303 if (col <= lastcol1) low1 = 0; \ 304 else high1 = nrow1; \ 305 lastcol1 = col; \ 306 while (high1 - low1 > 5) { \ 307 t = (low1 + high1) / 2; \ 308 if (rp1[t] > col) high1 = t; \ 309 else low1 = t; \ 310 } \ 311 for (_i = low1; _i < high1; _i++) { \ 312 if (rp1[_i] > col) break; \ 313 if (rp1[_i] == col) { \ 314 if (addv == ADD_VALUES) { \ 315 ap1[_i] += value; \ 316 /* Not sure LogFlops will slow dow the code or not */ \ 317 (void)PetscLogFlops(1.0); \ 318 } else ap1[_i] = value; \ 319 goto a_noinsert; \ 320 } \ 321 } \ 322 if (value == 0.0 && ignorezeroentries && row != col) { \ 323 low1 = 0; \ 324 high1 = nrow1; \ 325 goto a_noinsert; \ 326 } \ 327 if (nonew == 1) { \ 328 low1 = 0; \ 329 high1 = nrow1; \ 330 goto a_noinsert; \ 331 } \ 332 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 333 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 334 N = nrow1++ - 1; \ 335 a->nz++; \ 336 high1++; \ 337 /* shift up all the later entries in this row */ \ 338 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 339 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 340 rp1[_i] = col; \ 341 ap1[_i] = value; \ 342 A->nonzerostate++; \ 343 a_noinsert:; \ 344 ailen[row] = nrow1; \ 345 } 346 347 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 348 { \ 349 if (col <= lastcol2) low2 = 0; \ 350 else high2 = nrow2; \ 351 lastcol2 = col; \ 352 while (high2 - low2 > 5) { \ 353 t = (low2 + high2) / 2; \ 354 if (rp2[t] > col) high2 = t; \ 355 else low2 = t; \ 356 } \ 357 for (_i = low2; _i < high2; _i++) { \ 358 if (rp2[_i] > col) break; \ 359 if (rp2[_i] == col) { \ 360 if (addv == ADD_VALUES) { \ 361 ap2[_i] += value; \ 362 (void)PetscLogFlops(1.0); \ 363 } else ap2[_i] = value; \ 364 goto b_noinsert; \ 365 } \ 366 } \ 367 if (value == 0.0 && ignorezeroentries) { \ 368 low2 = 0; \ 369 high2 = nrow2; \ 370 goto b_noinsert; \ 371 } \ 372 if (nonew == 1) { \ 373 low2 = 0; \ 374 high2 = nrow2; \ 375 goto b_noinsert; \ 376 } \ 377 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 378 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 379 N = nrow2++ - 1; \ 380 b->nz++; \ 381 high2++; \ 382 /* shift up all the later entries in this row */ \ 383 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 384 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 385 rp2[_i] = col; \ 386 ap2[_i] = value; \ 387 B->nonzerostate++; \ 388 b_noinsert:; \ 389 bilen[row] = nrow2; \ 390 } 391 392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 393 { 394 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 395 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 396 PetscInt l, *garray = mat->garray, diag; 397 PetscScalar *aa, *ba; 398 399 PetscFunctionBegin; 400 /* code only works for square matrices A */ 401 402 /* find size of row to the left of the diagonal part */ 403 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 404 row = row - diag; 405 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 406 if (garray[b->j[b->i[row] + l]] > diag) break; 407 } 408 if (l) { 409 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 410 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 411 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 412 } 413 414 /* diagonal part */ 415 if (a->i[row + 1] - a->i[row]) { 416 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 417 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 418 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 419 } 420 421 /* right of diagonal part */ 422 if (b->i[row + 1] - b->i[row] - l) { 423 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 424 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 425 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 426 } 427 PetscFunctionReturn(0); 428 } 429 430 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 431 { 432 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 433 PetscScalar value = 0.0; 434 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 435 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 436 PetscBool roworiented = aij->roworiented; 437 438 /* Some Variables required in the macro */ 439 Mat A = aij->A; 440 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 441 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 442 PetscBool ignorezeroentries = a->ignorezeroentries; 443 Mat B = aij->B; 444 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 445 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 446 MatScalar *aa, *ba; 447 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 448 PetscInt nonew; 449 MatScalar *ap1, *ap2; 450 451 PetscFunctionBegin; 452 PetscCall(MatSeqAIJGetArray(A, &aa)); 453 PetscCall(MatSeqAIJGetArray(B, &ba)); 454 for (i = 0; i < m; i++) { 455 if (im[i] < 0) continue; 456 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 lastcol1 = -1; 460 rp1 = aj + ai[row]; 461 ap1 = aa + ai[row]; 462 rmax1 = aimax[row]; 463 nrow1 = ailen[row]; 464 low1 = 0; 465 high1 = nrow1; 466 lastcol2 = -1; 467 rp2 = bj + bi[row]; 468 ap2 = ba + bi[row]; 469 rmax2 = bimax[row]; 470 nrow2 = bilen[row]; 471 low2 = 0; 472 high2 = nrow2; 473 474 for (j = 0; j < n; j++) { 475 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 476 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 477 if (in[j] >= cstart && in[j] < cend) { 478 col = in[j] - cstart; 479 nonew = a->nonew; 480 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 481 } else if (in[j] < 0) { 482 continue; 483 } else { 484 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 485 if (mat->was_assembled) { 486 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 487 #if defined(PETSC_USE_CTABLE) 488 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 489 col--; 490 #else 491 col = aij->colmap[in[j]] - 1; 492 #endif 493 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 494 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 495 col = in[j]; 496 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 497 B = aij->B; 498 b = (Mat_SeqAIJ *)B->data; 499 bimax = b->imax; 500 bi = b->i; 501 bilen = b->ilen; 502 bj = b->j; 503 ba = b->a; 504 rp2 = bj + bi[row]; 505 ap2 = ba + bi[row]; 506 rmax2 = bimax[row]; 507 nrow2 = bilen[row]; 508 low2 = 0; 509 high2 = nrow2; 510 bm = aij->B->rmap->n; 511 ba = b->a; 512 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 513 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 514 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 515 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 516 } 517 } else col = in[j]; 518 nonew = b->nonew; 519 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 520 } 521 } 522 } else { 523 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 524 if (!aij->donotstash) { 525 mat->assembled = PETSC_FALSE; 526 if (roworiented) { 527 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 528 } else { 529 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } 531 } 532 } 533 } 534 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 535 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 536 PetscFunctionReturn(0); 537 } 538 539 /* 540 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 541 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 542 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 543 */ 544 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 545 { 546 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 547 Mat A = aij->A; /* diagonal part of the matrix */ 548 Mat B = aij->B; /* offdiagonal part of the matrix */ 549 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 550 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 551 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 552 PetscInt *ailen = a->ilen, *aj = a->j; 553 PetscInt *bilen = b->ilen, *bj = b->j; 554 PetscInt am = aij->A->rmap->n, j; 555 PetscInt diag_so_far = 0, dnz; 556 PetscInt offd_so_far = 0, onz; 557 558 PetscFunctionBegin; 559 /* Iterate over all rows of the matrix */ 560 for (j = 0; j < am; j++) { 561 dnz = onz = 0; 562 /* Iterate over all non-zero columns of the current row */ 563 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 564 /* If column is in the diagonal */ 565 if (mat_j[col] >= cstart && mat_j[col] < cend) { 566 aj[diag_so_far++] = mat_j[col] - cstart; 567 dnz++; 568 } else { /* off-diagonal entries */ 569 bj[offd_so_far++] = mat_j[col]; 570 onz++; 571 } 572 } 573 ailen[j] = dnz; 574 bilen[j] = onz; 575 } 576 PetscFunctionReturn(0); 577 } 578 579 /* 580 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 581 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 582 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 583 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 584 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 585 */ 586 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 587 { 588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 589 Mat A = aij->A; /* diagonal part of the matrix */ 590 Mat B = aij->B; /* offdiagonal part of the matrix */ 591 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 592 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 593 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 594 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 595 PetscInt *ailen = a->ilen, *aj = a->j; 596 PetscInt *bilen = b->ilen, *bj = b->j; 597 PetscInt am = aij->A->rmap->n, j; 598 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 599 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 600 PetscScalar *aa = a->a, *ba = b->a; 601 602 PetscFunctionBegin; 603 /* Iterate over all rows of the matrix */ 604 for (j = 0; j < am; j++) { 605 dnz_row = onz_row = 0; 606 rowstart_offd = full_offd_i[j]; 607 rowstart_diag = full_diag_i[j]; 608 /* Iterate over all non-zero columns of the current row */ 609 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 610 /* If column is in the diagonal */ 611 if (mat_j[col] >= cstart && mat_j[col] < cend) { 612 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 613 aa[rowstart_diag + dnz_row] = mat_a[col]; 614 dnz_row++; 615 } else { /* off-diagonal entries */ 616 bj[rowstart_offd + onz_row] = mat_j[col]; 617 ba[rowstart_offd + onz_row] = mat_a[col]; 618 onz_row++; 619 } 620 } 621 ailen[j] = dnz_row; 622 bilen[j] = onz_row; 623 } 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 628 { 629 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 630 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 632 633 PetscFunctionBegin; 634 for (i = 0; i < m; i++) { 635 if (idxm[i] < 0) continue; /* negative row */ 636 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 637 if (idxm[i] >= rstart && idxm[i] < rend) { 638 row = idxm[i] - rstart; 639 for (j = 0; j < n; j++) { 640 if (idxn[j] < 0) continue; /* negative column */ 641 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 642 if (idxn[j] >= cstart && idxn[j] < cend) { 643 col = idxn[j] - cstart; 644 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 645 } else { 646 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 647 #if defined(PETSC_USE_CTABLE) 648 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 649 col--; 650 #else 651 col = aij->colmap[idxn[j]] - 1; 652 #endif 653 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 654 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 655 } 656 } 657 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 658 } 659 PetscFunctionReturn(0); 660 } 661 662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 663 { 664 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 665 PetscInt nstash, reallocs; 666 667 PetscFunctionBegin; 668 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 669 670 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 671 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 672 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 673 PetscFunctionReturn(0); 674 } 675 676 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 679 PetscMPIInt n; 680 PetscInt i, j, rstart, ncols, flg; 681 PetscInt *row, *col; 682 PetscBool other_disassembled; 683 PetscScalar *val; 684 685 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 686 687 PetscFunctionBegin; 688 if (!aij->donotstash && !mat->nooffprocentries) { 689 while (1) { 690 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 691 if (!flg) break; 692 693 for (i = 0; i < n;) { 694 /* Now identify the consecutive vals belonging to the same row */ 695 for (j = i, rstart = row[j]; j < n; j++) { 696 if (row[j] != rstart) break; 697 } 698 if (j < n) ncols = j - i; 699 else ncols = n - i; 700 /* Now assemble all these values with a single function call */ 701 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 702 i = j; 703 } 704 } 705 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 706 } 707 #if defined(PETSC_HAVE_DEVICE) 708 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 709 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 710 if (mat->boundtocpu) { 711 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 712 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 713 } 714 #endif 715 PetscCall(MatAssemblyBegin(aij->A, mode)); 716 PetscCall(MatAssemblyEnd(aij->A, mode)); 717 718 /* determine if any processor has disassembled, if so we must 719 also disassemble ourself, in order that we may reassemble. */ 720 /* 721 if nonzero structure of submatrix B cannot change then we know that 722 no processor disassembled thus we can skip this stuff 723 */ 724 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 725 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 726 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 727 PetscCall(MatDisAssemble_MPIAIJ(mat)); 728 } 729 } 730 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 731 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 732 #if defined(PETSC_HAVE_DEVICE) 733 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 734 #endif 735 PetscCall(MatAssemblyBegin(aij->B, mode)); 736 PetscCall(MatAssemblyEnd(aij->B, mode)); 737 738 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 739 740 aij->rowvalues = NULL; 741 742 PetscCall(VecDestroy(&aij->diag)); 743 744 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 745 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 746 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 747 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 748 } 749 #if defined(PETSC_HAVE_DEVICE) 750 mat->offloadmask = PETSC_OFFLOAD_BOTH; 751 #endif 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 756 { 757 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 758 759 PetscFunctionBegin; 760 PetscCall(MatZeroEntries(l->A)); 761 PetscCall(MatZeroEntries(l->B)); 762 PetscFunctionReturn(0); 763 } 764 765 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 766 { 767 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 768 PetscObjectState sA, sB; 769 PetscInt *lrows; 770 PetscInt r, len; 771 PetscBool cong, lch, gch; 772 773 PetscFunctionBegin; 774 /* get locally owned rows */ 775 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 776 PetscCall(MatHasCongruentLayouts(A, &cong)); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 783 PetscCall(VecGetArrayRead(x, &xx)); 784 PetscCall(VecGetArray(b, &bb)); 785 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 786 PetscCall(VecRestoreArrayRead(x, &xx)); 787 PetscCall(VecRestoreArray(b, &bb)); 788 } 789 790 sA = mat->A->nonzerostate; 791 sB = mat->B->nonzerostate; 792 793 if (diag != 0.0 && cong) { 794 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 795 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 796 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 797 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 798 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 799 PetscInt nnwA, nnwB; 800 PetscBool nnzA, nnzB; 801 802 nnwA = aijA->nonew; 803 nnwB = aijB->nonew; 804 nnzA = aijA->keepnonzeropattern; 805 nnzB = aijB->keepnonzeropattern; 806 if (!nnzA) { 807 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 808 aijA->nonew = 0; 809 } 810 if (!nnzB) { 811 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 812 aijB->nonew = 0; 813 } 814 /* Must zero here before the next loop */ 815 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 816 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 if (row >= A->cmap->N) continue; 820 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 821 } 822 aijA->nonew = nnwA; 823 aijB->nonew = nnwB; 824 } else { 825 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 826 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 827 } 828 PetscCall(PetscFree(lrows)); 829 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 830 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 831 832 /* reduce nonzerostate */ 833 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 834 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 835 if (gch) A->nonzerostate++; 836 PetscFunctionReturn(0); 837 } 838 839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 840 { 841 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i, j, r, m, len = 0; 844 PetscInt *lrows, *owners = A->rmap->range; 845 PetscMPIInt p = 0; 846 PetscSFNode *rrows; 847 PetscSF sf; 848 const PetscScalar *xx; 849 PetscScalar *bb, *mask, *aij_a; 850 Vec xmask, lmask; 851 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 852 const PetscInt *aj, *ii, *ridx; 853 PetscScalar *aa; 854 855 PetscFunctionBegin; 856 /* Create SF where leaves are input rows and roots are owned rows */ 857 PetscCall(PetscMalloc1(n, &lrows)); 858 for (r = 0; r < n; ++r) lrows[r] = -1; 859 PetscCall(PetscMalloc1(N, &rrows)); 860 for (r = 0; r < N; ++r) { 861 const PetscInt idx = rows[r]; 862 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 863 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 864 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 865 } 866 rrows[r].rank = p; 867 rrows[r].index = rows[r] - owners[p]; 868 } 869 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 870 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 871 /* Collect flags for rows to be zeroed */ 872 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 873 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 874 PetscCall(PetscSFDestroy(&sf)); 875 /* Compress and put in row numbers */ 876 for (r = 0; r < n; ++r) 877 if (lrows[r] >= 0) lrows[len++] = r; 878 /* zero diagonal part of matrix */ 879 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 880 /* handle off diagonal part of matrix */ 881 PetscCall(MatCreateVecs(A, &xmask, NULL)); 882 PetscCall(VecDuplicate(l->lvec, &lmask)); 883 PetscCall(VecGetArray(xmask, &bb)); 884 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 885 PetscCall(VecRestoreArray(xmask, &bb)); 886 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 887 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 888 PetscCall(VecDestroy(&xmask)); 889 if (x && b) { /* this code is buggy when the row and column layout don't match */ 890 PetscBool cong; 891 892 PetscCall(MatHasCongruentLayouts(A, &cong)); 893 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 894 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 895 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 896 PetscCall(VecGetArrayRead(l->lvec, &xx)); 897 PetscCall(VecGetArray(b, &bb)); 898 } 899 PetscCall(VecGetArray(lmask, &mask)); 900 /* remove zeroed rows of off diagonal matrix */ 901 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 902 ii = aij->i; 903 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 904 /* loop over all elements of off process part of matrix zeroing removed columns*/ 905 if (aij->compressedrow.use) { 906 m = aij->compressedrow.nrows; 907 ii = aij->compressedrow.i; 908 ridx = aij->compressedrow.rindex; 909 for (i = 0; i < m; i++) { 910 n = ii[i + 1] - ii[i]; 911 aj = aij->j + ii[i]; 912 aa = aij_a + ii[i]; 913 914 for (j = 0; j < n; j++) { 915 if (PetscAbsScalar(mask[*aj])) { 916 if (b) bb[*ridx] -= *aa * xx[*aj]; 917 *aa = 0.0; 918 } 919 aa++; 920 aj++; 921 } 922 ridx++; 923 } 924 } else { /* do not use compressed row format */ 925 m = l->B->rmap->n; 926 for (i = 0; i < m; i++) { 927 n = ii[i + 1] - ii[i]; 928 aj = aij->j + ii[i]; 929 aa = aij_a + ii[i]; 930 for (j = 0; j < n; j++) { 931 if (PetscAbsScalar(mask[*aj])) { 932 if (b) bb[i] -= *aa * xx[*aj]; 933 *aa = 0.0; 934 } 935 aa++; 936 aj++; 937 } 938 } 939 } 940 if (x && b) { 941 PetscCall(VecRestoreArray(b, &bb)); 942 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 943 } 944 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 945 PetscCall(VecRestoreArray(lmask, &mask)); 946 PetscCall(VecDestroy(&lmask)); 947 PetscCall(PetscFree(lrows)); 948 949 /* only change matrix nonzero state if pattern was allowed to be changed */ 950 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 951 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 952 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 958 { 959 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 960 PetscInt nt; 961 VecScatter Mvctx = a->Mvctx; 962 963 PetscFunctionBegin; 964 PetscCall(VecGetLocalSize(xx, &nt)); 965 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 966 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 967 PetscUseTypeMethod(a->A, mult, xx, yy); 968 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 969 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 976 977 PetscFunctionBegin; 978 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 985 VecScatter Mvctx = a->Mvctx; 986 987 PetscFunctionBegin; 988 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 989 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 990 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 992 PetscFunctionReturn(0); 993 } 994 995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 998 999 PetscFunctionBegin; 1000 /* do nondiagonal part */ 1001 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1002 /* do local part */ 1003 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1004 /* add partial results together */ 1005 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1006 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1011 { 1012 MPI_Comm comm; 1013 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij; 1014 Mat Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs; 1015 IS Me, Notme; 1016 PetscInt M, N, first, last, *notme, i; 1017 PetscBool lf; 1018 PetscMPIInt size; 1019 1020 PetscFunctionBegin; 1021 /* Easy test: symmetric diagonal block */ 1022 Bij = (Mat_MPIAIJ *)Bmat->data; 1023 Bdia = Bij->A; 1024 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1025 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1026 if (!*f) PetscFunctionReturn(0); 1027 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1028 PetscCallMPI(MPI_Comm_size(comm, &size)); 1029 if (size == 1) PetscFunctionReturn(0); 1030 1031 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1032 PetscCall(MatGetSize(Amat, &M, &N)); 1033 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1034 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1035 for (i = 0; i < first; i++) notme[i] = i; 1036 for (i = last; i < M; i++) notme[i - last + first] = i; 1037 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1038 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1039 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1040 Aoff = Aoffs[0]; 1041 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1042 Boff = Boffs[0]; 1043 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1044 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1045 PetscCall(MatDestroyMatrices(1, &Boffs)); 1046 PetscCall(ISDestroy(&Me)); 1047 PetscCall(ISDestroy(&Notme)); 1048 PetscCall(PetscFree(notme)); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1053 { 1054 PetscFunctionBegin; 1055 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1060 { 1061 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1062 1063 PetscFunctionBegin; 1064 /* do nondiagonal part */ 1065 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1066 /* do local part */ 1067 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1068 /* add partial results together */ 1069 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1070 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 /* 1075 This only works correctly for square matrices where the subblock A->A is the 1076 diagonal block 1077 */ 1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1079 { 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1081 1082 PetscFunctionBegin; 1083 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1084 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1085 PetscCall(MatGetDiagonal(a->A, v)); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1092 1093 PetscFunctionBegin; 1094 PetscCall(MatScale(a->A, aa)); 1095 PetscCall(MatScale(a->B, aa)); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1100 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1101 { 1102 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1103 1104 PetscFunctionBegin; 1105 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1106 PetscCall(PetscFree(aij->Aperm1)); 1107 PetscCall(PetscFree(aij->Bperm1)); 1108 PetscCall(PetscFree(aij->Ajmap1)); 1109 PetscCall(PetscFree(aij->Bjmap1)); 1110 1111 PetscCall(PetscFree(aij->Aimap2)); 1112 PetscCall(PetscFree(aij->Bimap2)); 1113 PetscCall(PetscFree(aij->Aperm2)); 1114 PetscCall(PetscFree(aij->Bperm2)); 1115 PetscCall(PetscFree(aij->Ajmap2)); 1116 PetscCall(PetscFree(aij->Bjmap2)); 1117 1118 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1119 PetscCall(PetscFree(aij->Cperm1)); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1124 { 1125 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1126 1127 PetscFunctionBegin; 1128 #if defined(PETSC_USE_LOG) 1129 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1130 #endif 1131 PetscCall(MatStashDestroy_Private(&mat->stash)); 1132 PetscCall(VecDestroy(&aij->diag)); 1133 PetscCall(MatDestroy(&aij->A)); 1134 PetscCall(MatDestroy(&aij->B)); 1135 #if defined(PETSC_USE_CTABLE) 1136 PetscCall(PetscHMapIDestroy(&aij->colmap)); 1137 #else 1138 PetscCall(PetscFree(aij->colmap)); 1139 #endif 1140 PetscCall(PetscFree(aij->garray)); 1141 PetscCall(VecDestroy(&aij->lvec)); 1142 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1143 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 1144 PetscCall(PetscFree(aij->ld)); 1145 1146 /* Free COO */ 1147 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1148 1149 PetscCall(PetscFree(mat->data)); 1150 1151 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1152 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 1153 1154 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1157 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 1158 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 1159 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 1160 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 1161 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 1164 #if defined(PETSC_HAVE_CUDA) 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 1166 #endif 1167 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 1169 #endif 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 1171 #if defined(PETSC_HAVE_ELEMENTAL) 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 1173 #endif 1174 #if defined(PETSC_HAVE_SCALAPACK) 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 1176 #endif 1177 #if defined(PETSC_HAVE_HYPRE) 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 1179 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 1180 #endif 1181 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1182 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 1184 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 1185 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 1187 #if defined(PETSC_HAVE_MKL_SPARSE) 1188 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 1189 #endif 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1199 { 1200 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1201 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1202 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1203 const PetscInt *garray = aij->garray; 1204 const PetscScalar *aa, *ba; 1205 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1206 PetscInt *rowlens; 1207 PetscInt *colidxs; 1208 PetscScalar *matvals; 1209 1210 PetscFunctionBegin; 1211 PetscCall(PetscViewerSetUp(viewer)); 1212 1213 M = mat->rmap->N; 1214 N = mat->cmap->N; 1215 m = mat->rmap->n; 1216 rs = mat->rmap->rstart; 1217 cs = mat->cmap->rstart; 1218 nz = A->nz + B->nz; 1219 1220 /* write matrix header */ 1221 header[0] = MAT_FILE_CLASSID; 1222 header[1] = M; 1223 header[2] = N; 1224 header[3] = nz; 1225 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1226 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1227 1228 /* fill in and store row lengths */ 1229 PetscCall(PetscMalloc1(m, &rowlens)); 1230 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1231 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1232 PetscCall(PetscFree(rowlens)); 1233 1234 /* fill in and store column indices */ 1235 PetscCall(PetscMalloc1(nz, &colidxs)); 1236 for (cnt = 0, i = 0; i < m; i++) { 1237 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 colidxs[cnt++] = garray[B->j[jb]]; 1240 } 1241 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1242 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1243 } 1244 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1245 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1246 PetscCall(PetscFree(colidxs)); 1247 1248 /* fill in and store nonzero values */ 1249 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1250 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1251 PetscCall(PetscMalloc1(nz, &matvals)); 1252 for (cnt = 0, i = 0; i < m; i++) { 1253 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1254 if (garray[B->j[jb]] > cs) break; 1255 matvals[cnt++] = ba[jb]; 1256 } 1257 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1258 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1259 } 1260 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1261 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1262 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1263 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1264 PetscCall(PetscFree(matvals)); 1265 1266 /* write block size option to the viewer's .info file */ 1267 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #include <petscdraw.h> 1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1273 { 1274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1275 PetscMPIInt rank = aij->rank, size = aij->size; 1276 PetscBool isdraw, iascii, isbinary; 1277 PetscViewer sviewer; 1278 PetscViewerFormat format; 1279 1280 PetscFunctionBegin; 1281 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1282 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1283 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1284 if (iascii) { 1285 PetscCall(PetscViewerGetFormat(viewer, &format)); 1286 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1287 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1288 PetscCall(PetscMalloc1(size, &nz)); 1289 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1290 for (i = 0; i < (PetscInt)size; i++) { 1291 nmax = PetscMax(nmax, nz[i]); 1292 nmin = PetscMin(nmin, nz[i]); 1293 navg += nz[i]; 1294 } 1295 PetscCall(PetscFree(nz)); 1296 navg = navg / size; 1297 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1298 PetscFunctionReturn(0); 1299 } 1300 PetscCall(PetscViewerGetFormat(viewer, &format)); 1301 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1302 MatInfo info; 1303 PetscInt *inodes = NULL; 1304 1305 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1306 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1307 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1308 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1309 if (!inodes) { 1310 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1311 (double)info.memory)); 1312 } else { 1313 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1314 (double)info.memory)); 1315 } 1316 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1317 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1318 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1319 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1320 PetscCall(PetscViewerFlush(viewer)); 1321 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1322 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1323 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1324 PetscFunctionReturn(0); 1325 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1326 PetscInt inodecount, inodelimit, *inodes; 1327 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1328 if (inodes) { 1329 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1330 } else { 1331 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1332 } 1333 PetscFunctionReturn(0); 1334 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1335 PetscFunctionReturn(0); 1336 } 1337 } else if (isbinary) { 1338 if (size == 1) { 1339 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1340 PetscCall(MatView(aij->A, viewer)); 1341 } else { 1342 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1343 } 1344 PetscFunctionReturn(0); 1345 } else if (iascii && size == 1) { 1346 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1347 PetscCall(MatView(aij->A, viewer)); 1348 PetscFunctionReturn(0); 1349 } else if (isdraw) { 1350 PetscDraw draw; 1351 PetscBool isnull; 1352 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1353 PetscCall(PetscDrawIsNull(draw, &isnull)); 1354 if (isnull) PetscFunctionReturn(0); 1355 } 1356 1357 { /* assemble the entire matrix onto first processor */ 1358 Mat A = NULL, Av; 1359 IS isrow, iscol; 1360 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1362 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1363 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1364 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1365 /* The commented code uses MatCreateSubMatrices instead */ 1366 /* 1367 Mat *AA, A = NULL, Av; 1368 IS isrow,iscol; 1369 1370 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1372 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1373 if (rank == 0) { 1374 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1375 A = AA[0]; 1376 Av = AA[0]; 1377 } 1378 PetscCall(MatDestroySubMatrices(1,&AA)); 1379 */ 1380 PetscCall(ISDestroy(&iscol)); 1381 PetscCall(ISDestroy(&isrow)); 1382 /* 1383 Everyone has to call to draw the matrix since the graphics waits are 1384 synchronized across all processors that share the PetscDraw object 1385 */ 1386 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1387 if (rank == 0) { 1388 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1389 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1390 } 1391 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1392 PetscCall(PetscViewerFlush(viewer)); 1393 PetscCall(MatDestroy(&A)); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1399 { 1400 PetscBool iascii, isdraw, issocket, isbinary; 1401 1402 PetscFunctionBegin; 1403 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1404 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1405 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1406 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1407 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1412 { 1413 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1414 Vec bb1 = NULL; 1415 PetscBool hasop; 1416 1417 PetscFunctionBegin; 1418 if (flag == SOR_APPLY_UPPER) { 1419 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1424 1425 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1426 if (flag & SOR_ZERO_INITIAL_GUESS) { 1427 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1428 its--; 1429 } 1430 1431 while (its--) { 1432 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1433 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1434 1435 /* update rhs: bb1 = bb - B*x */ 1436 PetscCall(VecScale(mat->lvec, -1.0)); 1437 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1438 1439 /* local sweep */ 1440 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1441 } 1442 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1443 if (flag & SOR_ZERO_INITIAL_GUESS) { 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1445 its--; 1446 } 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec, -1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1457 } 1458 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1461 its--; 1462 } 1463 while (its--) { 1464 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 PetscCall(VecScale(mat->lvec, -1.0)); 1469 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1470 1471 /* local sweep */ 1472 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1473 } 1474 } else if (flag & SOR_EISENSTAT) { 1475 Vec xx1; 1476 1477 PetscCall(VecDuplicate(bb, &xx1)); 1478 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1479 1480 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1481 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1482 if (!mat->diag) { 1483 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1484 PetscCall(MatGetDiagonal(matin, mat->diag)); 1485 } 1486 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1487 if (hasop) { 1488 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1489 } else { 1490 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1491 } 1492 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1493 1494 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1495 1496 /* local sweep */ 1497 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1498 PetscCall(VecAXPY(xx, 1.0, xx1)); 1499 PetscCall(VecDestroy(&xx1)); 1500 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1501 1502 PetscCall(VecDestroy(&bb1)); 1503 1504 matin->factorerrortype = mat->A->factorerrortype; 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1509 { 1510 Mat aA, aB, Aperm; 1511 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1512 PetscScalar *aa, *ba; 1513 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1514 PetscSF rowsf, sf; 1515 IS parcolp = NULL; 1516 PetscBool done; 1517 1518 PetscFunctionBegin; 1519 PetscCall(MatGetLocalSize(A, &m, &n)); 1520 PetscCall(ISGetIndices(rowp, &rwant)); 1521 PetscCall(ISGetIndices(colp, &cwant)); 1522 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1523 1524 /* Invert row permutation to find out where my rows should go */ 1525 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1526 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1527 PetscCall(PetscSFSetFromOptions(rowsf)); 1528 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1529 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1530 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1531 1532 /* Invert column permutation to find out where my columns should go */ 1533 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1534 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1535 PetscCall(PetscSFSetFromOptions(sf)); 1536 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1537 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1538 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1539 PetscCall(PetscSFDestroy(&sf)); 1540 1541 PetscCall(ISRestoreIndices(rowp, &rwant)); 1542 PetscCall(ISRestoreIndices(colp, &cwant)); 1543 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1544 1545 /* Find out where my gcols should go */ 1546 PetscCall(MatGetSize(aB, NULL, &ng)); 1547 PetscCall(PetscMalloc1(ng, &gcdest)); 1548 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1549 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1550 PetscCall(PetscSFSetFromOptions(sf)); 1551 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&sf)); 1554 1555 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1556 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1557 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt row = rdest[i]; 1560 PetscMPIInt rowner; 1561 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1562 for (j = ai[i]; j < ai[i + 1]; j++) { 1563 PetscInt col = cdest[aj[j]]; 1564 PetscMPIInt cowner; 1565 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1566 if (rowner == cowner) dnnz[i]++; 1567 else onnz[i]++; 1568 } 1569 for (j = bi[i]; j < bi[i + 1]; j++) { 1570 PetscInt col = gcdest[bj[j]]; 1571 PetscMPIInt cowner; 1572 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1573 if (rowner == cowner) dnnz[i]++; 1574 else onnz[i]++; 1575 } 1576 } 1577 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1578 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1579 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1580 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1581 PetscCall(PetscSFDestroy(&rowsf)); 1582 1583 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1584 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1585 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1586 for (i = 0; i < m; i++) { 1587 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1588 PetscInt j0, rowlen; 1589 rowlen = ai[i + 1] - ai[i]; 1590 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1591 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1592 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1593 } 1594 rowlen = bi[i + 1] - bi[i]; 1595 for (j0 = j = 0; j < rowlen; j0 = j) { 1596 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1597 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1598 } 1599 } 1600 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1601 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1602 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1603 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1604 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1605 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1606 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1607 PetscCall(PetscFree3(work, rdest, cdest)); 1608 PetscCall(PetscFree(gcdest)); 1609 if (parcolp) PetscCall(ISDestroy(&colp)); 1610 *B = Aperm; 1611 PetscFunctionReturn(0); 1612 } 1613 1614 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1615 { 1616 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1617 1618 PetscFunctionBegin; 1619 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1620 if (ghosts) *ghosts = aij->garray; 1621 PetscFunctionReturn(0); 1622 } 1623 1624 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1625 { 1626 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1627 Mat A = mat->A, B = mat->B; 1628 PetscLogDouble isend[5], irecv[5]; 1629 1630 PetscFunctionBegin; 1631 info->block_size = 1.0; 1632 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1633 1634 isend[0] = info->nz_used; 1635 isend[1] = info->nz_allocated; 1636 isend[2] = info->nz_unneeded; 1637 isend[3] = info->memory; 1638 isend[4] = info->mallocs; 1639 1640 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1641 1642 isend[0] += info->nz_used; 1643 isend[1] += info->nz_allocated; 1644 isend[2] += info->nz_unneeded; 1645 isend[3] += info->memory; 1646 isend[4] += info->mallocs; 1647 if (flag == MAT_LOCAL) { 1648 info->nz_used = isend[0]; 1649 info->nz_allocated = isend[1]; 1650 info->nz_unneeded = isend[2]; 1651 info->memory = isend[3]; 1652 info->mallocs = isend[4]; 1653 } else if (flag == MAT_GLOBAL_MAX) { 1654 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1655 1656 info->nz_used = irecv[0]; 1657 info->nz_allocated = irecv[1]; 1658 info->nz_unneeded = irecv[2]; 1659 info->memory = irecv[3]; 1660 info->mallocs = irecv[4]; 1661 } else if (flag == MAT_GLOBAL_SUM) { 1662 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } 1670 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1671 info->fill_ratio_needed = 0; 1672 info->factor_mallocs = 0; 1673 PetscFunctionReturn(0); 1674 } 1675 1676 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1677 { 1678 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1679 1680 PetscFunctionBegin; 1681 switch (op) { 1682 case MAT_NEW_NONZERO_LOCATIONS: 1683 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1684 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1685 case MAT_KEEP_NONZERO_PATTERN: 1686 case MAT_NEW_NONZERO_LOCATION_ERR: 1687 case MAT_USE_INODES: 1688 case MAT_IGNORE_ZERO_ENTRIES: 1689 case MAT_FORM_EXPLICIT_TRANSPOSE: 1690 MatCheckPreallocated(A, 1); 1691 PetscCall(MatSetOption(a->A, op, flg)); 1692 PetscCall(MatSetOption(a->B, op, flg)); 1693 break; 1694 case MAT_ROW_ORIENTED: 1695 MatCheckPreallocated(A, 1); 1696 a->roworiented = flg; 1697 1698 PetscCall(MatSetOption(a->A, op, flg)); 1699 PetscCall(MatSetOption(a->B, op, flg)); 1700 break; 1701 case MAT_FORCE_DIAGONAL_ENTRIES: 1702 case MAT_SORTED_FULL: 1703 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1704 break; 1705 case MAT_IGNORE_OFF_PROC_ENTRIES: 1706 a->donotstash = flg; 1707 break; 1708 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1709 case MAT_SPD: 1710 case MAT_SYMMETRIC: 1711 case MAT_STRUCTURALLY_SYMMETRIC: 1712 case MAT_HERMITIAN: 1713 case MAT_SYMMETRY_ETERNAL: 1714 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1715 case MAT_SPD_ETERNAL: 1716 /* if the diagonal matrix is square it inherits some of the properties above */ 1717 break; 1718 case MAT_SUBMAT_SINGLEIS: 1719 A->submat_singleis = flg; 1720 break; 1721 case MAT_STRUCTURE_ONLY: 1722 /* The option is handled directly by MatSetOption() */ 1723 break; 1724 default: 1725 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1731 { 1732 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1733 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1734 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1735 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1736 PetscInt *cmap, *idx_p; 1737 1738 PetscFunctionBegin; 1739 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1740 mat->getrowactive = PETSC_TRUE; 1741 1742 if (!mat->rowvalues && (idx || v)) { 1743 /* 1744 allocate enough space to hold information from the longest row. 1745 */ 1746 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1747 PetscInt max = 1, tmp; 1748 for (i = 0; i < matin->rmap->n; i++) { 1749 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1750 if (max < tmp) max = tmp; 1751 } 1752 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1753 } 1754 1755 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1756 lrow = row - rstart; 1757 1758 pvA = &vworkA; 1759 pcA = &cworkA; 1760 pvB = &vworkB; 1761 pcB = &cworkB; 1762 if (!v) { 1763 pvA = NULL; 1764 pvB = NULL; 1765 } 1766 if (!idx) { 1767 pcA = NULL; 1768 if (!v) pcB = NULL; 1769 } 1770 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1771 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1772 nztot = nzA + nzB; 1773 1774 cmap = mat->garray; 1775 if (v || idx) { 1776 if (nztot) { 1777 /* Sort by increasing column numbers, assuming A and B already sorted */ 1778 PetscInt imark = -1; 1779 if (v) { 1780 *v = v_p = mat->rowvalues; 1781 for (i = 0; i < nzB; i++) { 1782 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1783 else break; 1784 } 1785 imark = i; 1786 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1787 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1788 } 1789 if (idx) { 1790 *idx = idx_p = mat->rowindices; 1791 if (imark > -1) { 1792 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1793 } else { 1794 for (i = 0; i < nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1796 else break; 1797 } 1798 imark = i; 1799 } 1800 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1801 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 if (idx) *idx = NULL; 1805 if (v) *v = NULL; 1806 } 1807 } 1808 *nz = nztot; 1809 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1810 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1815 { 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1817 1818 PetscFunctionBegin; 1819 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1820 aij->getrowactive = PETSC_FALSE; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1825 { 1826 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1827 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1828 PetscInt i, j, cstart = mat->cmap->rstart; 1829 PetscReal sum = 0.0; 1830 const MatScalar *v, *amata, *bmata; 1831 1832 PetscFunctionBegin; 1833 if (aij->size == 1) { 1834 PetscCall(MatNorm(aij->A, type, norm)); 1835 } else { 1836 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1837 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1838 if (type == NORM_FROBENIUS) { 1839 v = amata; 1840 for (i = 0; i < amat->nz; i++) { 1841 sum += PetscRealPart(PetscConj(*v) * (*v)); 1842 v++; 1843 } 1844 v = bmata; 1845 for (i = 0; i < bmat->nz; i++) { 1846 sum += PetscRealPart(PetscConj(*v) * (*v)); 1847 v++; 1848 } 1849 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1850 *norm = PetscSqrtReal(*norm); 1851 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1852 } else if (type == NORM_1) { /* max column norm */ 1853 PetscReal *tmp, *tmp2; 1854 PetscInt *jj, *garray = aij->garray; 1855 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1856 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1857 *norm = 0.0; 1858 v = amata; 1859 jj = amat->j; 1860 for (j = 0; j < amat->nz; j++) { 1861 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1862 v++; 1863 } 1864 v = bmata; 1865 jj = bmat->j; 1866 for (j = 0; j < bmat->nz; j++) { 1867 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1868 v++; 1869 } 1870 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1871 for (j = 0; j < mat->cmap->N; j++) { 1872 if (tmp2[j] > *norm) *norm = tmp2[j]; 1873 } 1874 PetscCall(PetscFree(tmp)); 1875 PetscCall(PetscFree(tmp2)); 1876 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1877 } else if (type == NORM_INFINITY) { /* max row norm */ 1878 PetscReal ntemp = 0.0; 1879 for (j = 0; j < aij->A->rmap->n; j++) { 1880 v = amata + amat->i[j]; 1881 sum = 0.0; 1882 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1883 sum += PetscAbsScalar(*v); 1884 v++; 1885 } 1886 v = bmata + bmat->i[j]; 1887 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1888 sum += PetscAbsScalar(*v); 1889 v++; 1890 } 1891 if (sum > ntemp) ntemp = sum; 1892 } 1893 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1894 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1895 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1896 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1897 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1903 { 1904 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1905 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1906 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1907 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1908 Mat B, A_diag, *B_diag; 1909 const MatScalar *pbv, *bv; 1910 1911 PetscFunctionBegin; 1912 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1913 ma = A->rmap->n; 1914 na = A->cmap->n; 1915 mb = a->B->rmap->n; 1916 nb = a->B->cmap->n; 1917 ai = Aloc->i; 1918 aj = Aloc->j; 1919 bi = Bloc->i; 1920 bj = Bloc->j; 1921 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1922 PetscInt *d_nnz, *g_nnz, *o_nnz; 1923 PetscSFNode *oloc; 1924 PETSC_UNUSED PetscSF sf; 1925 1926 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1927 /* compute d_nnz for preallocation */ 1928 PetscCall(PetscArrayzero(d_nnz, na)); 1929 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1930 /* compute local off-diagonal contributions */ 1931 PetscCall(PetscArrayzero(g_nnz, nb)); 1932 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1933 /* map those to global */ 1934 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1935 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1936 PetscCall(PetscSFSetFromOptions(sf)); 1937 PetscCall(PetscArrayzero(o_nnz, na)); 1938 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1939 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1940 PetscCall(PetscSFDestroy(&sf)); 1941 1942 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1943 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1944 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1945 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1946 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1947 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1948 } else { 1949 B = *matout; 1950 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1951 } 1952 1953 b = (Mat_MPIAIJ *)B->data; 1954 A_diag = a->A; 1955 B_diag = &b->A; 1956 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1957 A_diag_ncol = A_diag->cmap->N; 1958 B_diag_ilen = sub_B_diag->ilen; 1959 B_diag_i = sub_B_diag->i; 1960 1961 /* Set ilen for diagonal of B */ 1962 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1963 1964 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1965 very quickly (=without using MatSetValues), because all writes are local. */ 1966 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1967 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1968 1969 /* copy over the B part */ 1970 PetscCall(PetscMalloc1(bi[mb], &cols)); 1971 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1972 pbv = bv; 1973 row = A->rmap->rstart; 1974 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1975 cols_tmp = cols; 1976 for (i = 0; i < mb; i++) { 1977 ncol = bi[i + 1] - bi[i]; 1978 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1979 row++; 1980 pbv += ncol; 1981 cols_tmp += ncol; 1982 } 1983 PetscCall(PetscFree(cols)); 1984 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1985 1986 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1987 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1988 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1989 *matout = B; 1990 } else { 1991 PetscCall(MatHeaderMerge(A, &B)); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1997 { 1998 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1999 Mat a = aij->A, b = aij->B; 2000 PetscInt s1, s2, s3; 2001 2002 PetscFunctionBegin; 2003 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2004 if (rr) { 2005 PetscCall(VecGetLocalSize(rr, &s1)); 2006 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2007 /* Overlap communication with computation. */ 2008 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2009 } 2010 if (ll) { 2011 PetscCall(VecGetLocalSize(ll, &s1)); 2012 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2013 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2014 } 2015 /* scale the diagonal block */ 2016 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2017 2018 if (rr) { 2019 /* Do a scatter end and then right scale the off-diagonal block */ 2020 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2021 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2022 } 2023 PetscFunctionReturn(0); 2024 } 2025 2026 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2027 { 2028 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2029 2030 PetscFunctionBegin; 2031 PetscCall(MatSetUnfactored(a->A)); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2036 { 2037 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2038 Mat a, b, c, d; 2039 PetscBool flg; 2040 2041 PetscFunctionBegin; 2042 a = matA->A; 2043 b = matA->B; 2044 c = matB->A; 2045 d = matB->B; 2046 2047 PetscCall(MatEqual(a, c, &flg)); 2048 if (flg) PetscCall(MatEqual(b, d, &flg)); 2049 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2054 { 2055 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2056 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2057 2058 PetscFunctionBegin; 2059 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2060 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2061 /* because of the column compression in the off-processor part of the matrix a->B, 2062 the number of columns in a->B and b->B may be different, hence we cannot call 2063 the MatCopy() directly on the two parts. If need be, we can provide a more 2064 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2065 then copying the submatrices */ 2066 PetscCall(MatCopy_Basic(A, B, str)); 2067 } else { 2068 PetscCall(MatCopy(a->A, b->A, str)); 2069 PetscCall(MatCopy(a->B, b->B, str)); 2070 } 2071 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2076 { 2077 PetscFunctionBegin; 2078 PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 2079 PetscFunctionReturn(0); 2080 } 2081 2082 /* 2083 Computes the number of nonzeros per row needed for preallocation when X and Y 2084 have different nonzero structure. 2085 */ 2086 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2087 { 2088 PetscInt i, j, k, nzx, nzy; 2089 2090 PetscFunctionBegin; 2091 /* Set the number of nonzeros in the new matrix */ 2092 for (i = 0; i < m; i++) { 2093 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2094 nzx = xi[i + 1] - xi[i]; 2095 nzy = yi[i + 1] - yi[i]; 2096 nnz[i] = 0; 2097 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2098 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2099 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2100 nnz[i]++; 2101 } 2102 for (; k < nzy; k++) nnz[i]++; 2103 } 2104 PetscFunctionReturn(0); 2105 } 2106 2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2109 { 2110 PetscInt m = Y->rmap->N; 2111 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2112 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2113 2114 PetscFunctionBegin; 2115 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2120 { 2121 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2122 2123 PetscFunctionBegin; 2124 if (str == SAME_NONZERO_PATTERN) { 2125 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2126 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2127 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2128 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2129 } else { 2130 Mat B; 2131 PetscInt *nnz_d, *nnz_o; 2132 2133 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2134 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2135 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2136 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2137 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2138 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2139 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2140 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2141 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2142 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2143 PetscCall(MatHeaderMerge(Y, &B)); 2144 PetscCall(PetscFree(nnz_d)); 2145 PetscCall(PetscFree(nnz_o)); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2151 2152 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2153 { 2154 PetscFunctionBegin; 2155 if (PetscDefined(USE_COMPLEX)) { 2156 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2157 2158 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2159 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2160 } 2161 PetscFunctionReturn(0); 2162 } 2163 2164 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2165 { 2166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2167 2168 PetscFunctionBegin; 2169 PetscCall(MatRealPart(a->A)); 2170 PetscCall(MatRealPart(a->B)); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2175 { 2176 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2177 2178 PetscFunctionBegin; 2179 PetscCall(MatImaginaryPart(a->A)); 2180 PetscCall(MatImaginaryPart(a->B)); 2181 PetscFunctionReturn(0); 2182 } 2183 2184 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2185 { 2186 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2187 PetscInt i, *idxb = NULL, m = A->rmap->n; 2188 PetscScalar *va, *vv; 2189 Vec vB, vA; 2190 const PetscScalar *vb; 2191 2192 PetscFunctionBegin; 2193 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2194 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2195 2196 PetscCall(VecGetArrayWrite(vA, &va)); 2197 if (idx) { 2198 for (i = 0; i < m; i++) { 2199 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2200 } 2201 } 2202 2203 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2204 PetscCall(PetscMalloc1(m, &idxb)); 2205 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2206 2207 PetscCall(VecGetArrayWrite(v, &vv)); 2208 PetscCall(VecGetArrayRead(vB, &vb)); 2209 for (i = 0; i < m; i++) { 2210 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2211 vv[i] = vb[i]; 2212 if (idx) idx[i] = a->garray[idxb[i]]; 2213 } else { 2214 vv[i] = va[i]; 2215 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2216 } 2217 } 2218 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2219 PetscCall(VecRestoreArrayWrite(vA, &va)); 2220 PetscCall(VecRestoreArrayRead(vB, &vb)); 2221 PetscCall(PetscFree(idxb)); 2222 PetscCall(VecDestroy(&vA)); 2223 PetscCall(VecDestroy(&vB)); 2224 PetscFunctionReturn(0); 2225 } 2226 2227 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2228 { 2229 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2230 PetscInt m = A->rmap->n, n = A->cmap->n; 2231 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2232 PetscInt *cmap = mat->garray; 2233 PetscInt *diagIdx, *offdiagIdx; 2234 Vec diagV, offdiagV; 2235 PetscScalar *a, *diagA, *offdiagA; 2236 const PetscScalar *ba, *bav; 2237 PetscInt r, j, col, ncols, *bi, *bj; 2238 Mat B = mat->B; 2239 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2240 2241 PetscFunctionBegin; 2242 /* When a process holds entire A and other processes have no entry */ 2243 if (A->cmap->N == n) { 2244 PetscCall(VecGetArrayWrite(v, &diagA)); 2245 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2246 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2247 PetscCall(VecDestroy(&diagV)); 2248 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2249 PetscFunctionReturn(0); 2250 } else if (n == 0) { 2251 if (m) { 2252 PetscCall(VecGetArrayWrite(v, &a)); 2253 for (r = 0; r < m; r++) { 2254 a[r] = 0.0; 2255 if (idx) idx[r] = -1; 2256 } 2257 PetscCall(VecRestoreArrayWrite(v, &a)); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2263 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2264 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2265 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2266 2267 /* Get offdiagIdx[] for implicit 0.0 */ 2268 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2269 ba = bav; 2270 bi = b->i; 2271 bj = b->j; 2272 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2273 for (r = 0; r < m; r++) { 2274 ncols = bi[r + 1] - bi[r]; 2275 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2276 offdiagA[r] = *ba; 2277 offdiagIdx[r] = cmap[0]; 2278 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2279 offdiagA[r] = 0.0; 2280 2281 /* Find first hole in the cmap */ 2282 for (j = 0; j < ncols; j++) { 2283 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2284 if (col > j && j < cstart) { 2285 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2286 break; 2287 } else if (col > j + n && j >= cstart) { 2288 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2289 break; 2290 } 2291 } 2292 if (j == ncols && ncols < A->cmap->N - n) { 2293 /* a hole is outside compressed Bcols */ 2294 if (ncols == 0) { 2295 if (cstart) { 2296 offdiagIdx[r] = 0; 2297 } else offdiagIdx[r] = cend; 2298 } else { /* ncols > 0 */ 2299 offdiagIdx[r] = cmap[ncols - 1] + 1; 2300 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2301 } 2302 } 2303 } 2304 2305 for (j = 0; j < ncols; j++) { 2306 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2307 offdiagA[r] = *ba; 2308 offdiagIdx[r] = cmap[*bj]; 2309 } 2310 ba++; 2311 bj++; 2312 } 2313 } 2314 2315 PetscCall(VecGetArrayWrite(v, &a)); 2316 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2317 for (r = 0; r < m; ++r) { 2318 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2319 a[r] = diagA[r]; 2320 if (idx) idx[r] = cstart + diagIdx[r]; 2321 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 if (idx) { 2324 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2325 idx[r] = cstart + diagIdx[r]; 2326 } else idx[r] = offdiagIdx[r]; 2327 } 2328 } else { 2329 a[r] = offdiagA[r]; 2330 if (idx) idx[r] = offdiagIdx[r]; 2331 } 2332 } 2333 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2334 PetscCall(VecRestoreArrayWrite(v, &a)); 2335 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2336 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2337 PetscCall(VecDestroy(&diagV)); 2338 PetscCall(VecDestroy(&offdiagV)); 2339 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2344 { 2345 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2346 PetscInt m = A->rmap->n, n = A->cmap->n; 2347 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2348 PetscInt *cmap = mat->garray; 2349 PetscInt *diagIdx, *offdiagIdx; 2350 Vec diagV, offdiagV; 2351 PetscScalar *a, *diagA, *offdiagA; 2352 const PetscScalar *ba, *bav; 2353 PetscInt r, j, col, ncols, *bi, *bj; 2354 Mat B = mat->B; 2355 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2356 2357 PetscFunctionBegin; 2358 /* When a process holds entire A and other processes have no entry */ 2359 if (A->cmap->N == n) { 2360 PetscCall(VecGetArrayWrite(v, &diagA)); 2361 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2362 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2363 PetscCall(VecDestroy(&diagV)); 2364 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2365 PetscFunctionReturn(0); 2366 } else if (n == 0) { 2367 if (m) { 2368 PetscCall(VecGetArrayWrite(v, &a)); 2369 for (r = 0; r < m; r++) { 2370 a[r] = PETSC_MAX_REAL; 2371 if (idx) idx[r] = -1; 2372 } 2373 PetscCall(VecRestoreArrayWrite(v, &a)); 2374 } 2375 PetscFunctionReturn(0); 2376 } 2377 2378 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2379 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2380 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2381 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2382 2383 /* Get offdiagIdx[] for implicit 0.0 */ 2384 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2385 ba = bav; 2386 bi = b->i; 2387 bj = b->j; 2388 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2389 for (r = 0; r < m; r++) { 2390 ncols = bi[r + 1] - bi[r]; 2391 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2392 offdiagA[r] = *ba; 2393 offdiagIdx[r] = cmap[0]; 2394 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2395 offdiagA[r] = 0.0; 2396 2397 /* Find first hole in the cmap */ 2398 for (j = 0; j < ncols; j++) { 2399 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2400 if (col > j && j < cstart) { 2401 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2402 break; 2403 } else if (col > j + n && j >= cstart) { 2404 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2405 break; 2406 } 2407 } 2408 if (j == ncols && ncols < A->cmap->N - n) { 2409 /* a hole is outside compressed Bcols */ 2410 if (ncols == 0) { 2411 if (cstart) { 2412 offdiagIdx[r] = 0; 2413 } else offdiagIdx[r] = cend; 2414 } else { /* ncols > 0 */ 2415 offdiagIdx[r] = cmap[ncols - 1] + 1; 2416 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2417 } 2418 } 2419 } 2420 2421 for (j = 0; j < ncols; j++) { 2422 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2423 offdiagA[r] = *ba; 2424 offdiagIdx[r] = cmap[*bj]; 2425 } 2426 ba++; 2427 bj++; 2428 } 2429 } 2430 2431 PetscCall(VecGetArrayWrite(v, &a)); 2432 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2433 for (r = 0; r < m; ++r) { 2434 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2435 a[r] = diagA[r]; 2436 if (idx) idx[r] = cstart + diagIdx[r]; 2437 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2438 a[r] = diagA[r]; 2439 if (idx) { 2440 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2441 idx[r] = cstart + diagIdx[r]; 2442 } else idx[r] = offdiagIdx[r]; 2443 } 2444 } else { 2445 a[r] = offdiagA[r]; 2446 if (idx) idx[r] = offdiagIdx[r]; 2447 } 2448 } 2449 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2450 PetscCall(VecRestoreArrayWrite(v, &a)); 2451 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2452 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2453 PetscCall(VecDestroy(&diagV)); 2454 PetscCall(VecDestroy(&offdiagV)); 2455 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2456 PetscFunctionReturn(0); 2457 } 2458 2459 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2460 { 2461 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2462 PetscInt m = A->rmap->n, n = A->cmap->n; 2463 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2464 PetscInt *cmap = mat->garray; 2465 PetscInt *diagIdx, *offdiagIdx; 2466 Vec diagV, offdiagV; 2467 PetscScalar *a, *diagA, *offdiagA; 2468 const PetscScalar *ba, *bav; 2469 PetscInt r, j, col, ncols, *bi, *bj; 2470 Mat B = mat->B; 2471 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2472 2473 PetscFunctionBegin; 2474 /* When a process holds entire A and other processes have no entry */ 2475 if (A->cmap->N == n) { 2476 PetscCall(VecGetArrayWrite(v, &diagA)); 2477 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2478 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2479 PetscCall(VecDestroy(&diagV)); 2480 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2481 PetscFunctionReturn(0); 2482 } else if (n == 0) { 2483 if (m) { 2484 PetscCall(VecGetArrayWrite(v, &a)); 2485 for (r = 0; r < m; r++) { 2486 a[r] = PETSC_MIN_REAL; 2487 if (idx) idx[r] = -1; 2488 } 2489 PetscCall(VecRestoreArrayWrite(v, &a)); 2490 } 2491 PetscFunctionReturn(0); 2492 } 2493 2494 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2495 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2496 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2497 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2498 2499 /* Get offdiagIdx[] for implicit 0.0 */ 2500 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2501 ba = bav; 2502 bi = b->i; 2503 bj = b->j; 2504 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2505 for (r = 0; r < m; r++) { 2506 ncols = bi[r + 1] - bi[r]; 2507 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2508 offdiagA[r] = *ba; 2509 offdiagIdx[r] = cmap[0]; 2510 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2511 offdiagA[r] = 0.0; 2512 2513 /* Find first hole in the cmap */ 2514 for (j = 0; j < ncols; j++) { 2515 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2516 if (col > j && j < cstart) { 2517 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2518 break; 2519 } else if (col > j + n && j >= cstart) { 2520 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2521 break; 2522 } 2523 } 2524 if (j == ncols && ncols < A->cmap->N - n) { 2525 /* a hole is outside compressed Bcols */ 2526 if (ncols == 0) { 2527 if (cstart) { 2528 offdiagIdx[r] = 0; 2529 } else offdiagIdx[r] = cend; 2530 } else { /* ncols > 0 */ 2531 offdiagIdx[r] = cmap[ncols - 1] + 1; 2532 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2533 } 2534 } 2535 } 2536 2537 for (j = 0; j < ncols; j++) { 2538 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2539 offdiagA[r] = *ba; 2540 offdiagIdx[r] = cmap[*bj]; 2541 } 2542 ba++; 2543 bj++; 2544 } 2545 } 2546 2547 PetscCall(VecGetArrayWrite(v, &a)); 2548 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2549 for (r = 0; r < m; ++r) { 2550 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 if (idx) idx[r] = cstart + diagIdx[r]; 2553 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2554 a[r] = diagA[r]; 2555 if (idx) { 2556 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2557 idx[r] = cstart + diagIdx[r]; 2558 } else idx[r] = offdiagIdx[r]; 2559 } 2560 } else { 2561 a[r] = offdiagA[r]; 2562 if (idx) idx[r] = offdiagIdx[r]; 2563 } 2564 } 2565 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2566 PetscCall(VecRestoreArrayWrite(v, &a)); 2567 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2568 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2569 PetscCall(VecDestroy(&diagV)); 2570 PetscCall(VecDestroy(&offdiagV)); 2571 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2576 { 2577 Mat *dummy; 2578 2579 PetscFunctionBegin; 2580 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2581 *newmat = *dummy; 2582 PetscCall(PetscFree(dummy)); 2583 PetscFunctionReturn(0); 2584 } 2585 2586 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2587 { 2588 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2589 2590 PetscFunctionBegin; 2591 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2592 A->factorerrortype = a->A->factorerrortype; 2593 PetscFunctionReturn(0); 2594 } 2595 2596 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2597 { 2598 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2599 2600 PetscFunctionBegin; 2601 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2602 PetscCall(MatSetRandom(aij->A, rctx)); 2603 if (x->assembled) { 2604 PetscCall(MatSetRandom(aij->B, rctx)); 2605 } else { 2606 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2607 } 2608 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2609 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2614 { 2615 PetscFunctionBegin; 2616 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2617 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2618 PetscFunctionReturn(0); 2619 } 2620 2621 /*@ 2622 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2623 2624 Not collective 2625 2626 Input Parameter: 2627 . A - the matrix 2628 2629 Output Parameter: 2630 . nz - the number of nonzeros 2631 2632 Level: advanced 2633 2634 .seealso: `MATMPIAIJ`, `Mat` 2635 @*/ 2636 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2637 { 2638 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2639 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2640 2641 PetscFunctionBegin; 2642 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2643 PetscFunctionReturn(0); 2644 } 2645 2646 /*@ 2647 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2648 2649 Collective on A 2650 2651 Input Parameters: 2652 + A - the matrix 2653 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2654 2655 Level: advanced 2656 2657 @*/ 2658 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2659 { 2660 PetscFunctionBegin; 2661 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2666 { 2667 PetscBool sc = PETSC_FALSE, flg; 2668 2669 PetscFunctionBegin; 2670 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2671 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2672 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2673 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2674 PetscOptionsHeadEnd(); 2675 PetscFunctionReturn(0); 2676 } 2677 2678 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2679 { 2680 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2681 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2682 2683 PetscFunctionBegin; 2684 if (!Y->preallocated) { 2685 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2686 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2687 PetscInt nonew = aij->nonew; 2688 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2689 aij->nonew = nonew; 2690 } 2691 PetscCall(MatShift_Basic(Y, a)); 2692 PetscFunctionReturn(0); 2693 } 2694 2695 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2696 { 2697 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2698 2699 PetscFunctionBegin; 2700 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2701 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2702 if (d) { 2703 PetscInt rstart; 2704 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2705 *d += rstart; 2706 } 2707 PetscFunctionReturn(0); 2708 } 2709 2710 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2711 { 2712 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2713 2714 PetscFunctionBegin; 2715 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2716 PetscFunctionReturn(0); 2717 } 2718 2719 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2720 { 2721 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2722 2723 PetscFunctionBegin; 2724 PetscCall(MatEliminateZeros(a->A)); 2725 PetscCall(MatEliminateZeros(a->B)); 2726 PetscFunctionReturn(0); 2727 } 2728 2729 /* -------------------------------------------------------------------*/ 2730 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2731 MatGetRow_MPIAIJ, 2732 MatRestoreRow_MPIAIJ, 2733 MatMult_MPIAIJ, 2734 /* 4*/ MatMultAdd_MPIAIJ, 2735 MatMultTranspose_MPIAIJ, 2736 MatMultTransposeAdd_MPIAIJ, 2737 NULL, 2738 NULL, 2739 NULL, 2740 /*10*/ NULL, 2741 NULL, 2742 NULL, 2743 MatSOR_MPIAIJ, 2744 MatTranspose_MPIAIJ, 2745 /*15*/ MatGetInfo_MPIAIJ, 2746 MatEqual_MPIAIJ, 2747 MatGetDiagonal_MPIAIJ, 2748 MatDiagonalScale_MPIAIJ, 2749 MatNorm_MPIAIJ, 2750 /*20*/ MatAssemblyBegin_MPIAIJ, 2751 MatAssemblyEnd_MPIAIJ, 2752 MatSetOption_MPIAIJ, 2753 MatZeroEntries_MPIAIJ, 2754 /*24*/ MatZeroRows_MPIAIJ, 2755 NULL, 2756 NULL, 2757 NULL, 2758 NULL, 2759 /*29*/ MatSetUp_MPIAIJ, 2760 NULL, 2761 NULL, 2762 MatGetDiagonalBlock_MPIAIJ, 2763 NULL, 2764 /*34*/ MatDuplicate_MPIAIJ, 2765 NULL, 2766 NULL, 2767 NULL, 2768 NULL, 2769 /*39*/ MatAXPY_MPIAIJ, 2770 MatCreateSubMatrices_MPIAIJ, 2771 MatIncreaseOverlap_MPIAIJ, 2772 MatGetValues_MPIAIJ, 2773 MatCopy_MPIAIJ, 2774 /*44*/ MatGetRowMax_MPIAIJ, 2775 MatScale_MPIAIJ, 2776 MatShift_MPIAIJ, 2777 MatDiagonalSet_MPIAIJ, 2778 MatZeroRowsColumns_MPIAIJ, 2779 /*49*/ MatSetRandom_MPIAIJ, 2780 MatGetRowIJ_MPIAIJ, 2781 MatRestoreRowIJ_MPIAIJ, 2782 NULL, 2783 NULL, 2784 /*54*/ MatFDColoringCreate_MPIXAIJ, 2785 NULL, 2786 MatSetUnfactored_MPIAIJ, 2787 MatPermute_MPIAIJ, 2788 NULL, 2789 /*59*/ MatCreateSubMatrix_MPIAIJ, 2790 MatDestroy_MPIAIJ, 2791 MatView_MPIAIJ, 2792 NULL, 2793 NULL, 2794 /*64*/ NULL, 2795 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2800 MatGetRowMinAbs_MPIAIJ, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /*75*/ MatFDColoringApply_AIJ, 2806 MatSetFromOptions_MPIAIJ, 2807 NULL, 2808 NULL, 2809 MatFindZeroDiagonals_MPIAIJ, 2810 /*80*/ NULL, 2811 NULL, 2812 NULL, 2813 /*83*/ MatLoad_MPIAIJ, 2814 MatIsSymmetric_MPIAIJ, 2815 NULL, 2816 NULL, 2817 NULL, 2818 NULL, 2819 /*89*/ NULL, 2820 NULL, 2821 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2822 NULL, 2823 NULL, 2824 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2825 NULL, 2826 NULL, 2827 NULL, 2828 MatBindToCPU_MPIAIJ, 2829 /*99*/ MatProductSetFromOptions_MPIAIJ, 2830 NULL, 2831 NULL, 2832 MatConjugate_MPIAIJ, 2833 NULL, 2834 /*104*/ MatSetValuesRow_MPIAIJ, 2835 MatRealPart_MPIAIJ, 2836 MatImaginaryPart_MPIAIJ, 2837 NULL, 2838 NULL, 2839 /*109*/ NULL, 2840 NULL, 2841 MatGetRowMin_MPIAIJ, 2842 NULL, 2843 MatMissingDiagonal_MPIAIJ, 2844 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2845 NULL, 2846 MatGetGhosts_MPIAIJ, 2847 NULL, 2848 NULL, 2849 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2850 NULL, 2851 NULL, 2852 NULL, 2853 MatGetMultiProcBlock_MPIAIJ, 2854 /*124*/ MatFindNonzeroRows_MPIAIJ, 2855 MatGetColumnReductions_MPIAIJ, 2856 MatInvertBlockDiagonal_MPIAIJ, 2857 MatInvertVariableBlockDiagonal_MPIAIJ, 2858 MatCreateSubMatricesMPI_MPIAIJ, 2859 /*129*/ NULL, 2860 NULL, 2861 NULL, 2862 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2863 NULL, 2864 /*134*/ NULL, 2865 NULL, 2866 NULL, 2867 NULL, 2868 NULL, 2869 /*139*/ MatSetBlockSizes_MPIAIJ, 2870 NULL, 2871 NULL, 2872 MatFDColoringSetUp_MPIXAIJ, 2873 MatFindOffBlockDiagonalEntries_MPIAIJ, 2874 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2875 /*145*/ NULL, 2876 NULL, 2877 NULL, 2878 MatCreateGraph_Simple_AIJ, 2879 NULL, 2880 /*150*/ NULL, 2881 MatEliminateZeros_MPIAIJ}; 2882 2883 /* ----------------------------------------------------------------------------------------*/ 2884 2885 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2886 { 2887 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2888 2889 PetscFunctionBegin; 2890 PetscCall(MatStoreValues(aij->A)); 2891 PetscCall(MatStoreValues(aij->B)); 2892 PetscFunctionReturn(0); 2893 } 2894 2895 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2896 { 2897 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2898 2899 PetscFunctionBegin; 2900 PetscCall(MatRetrieveValues(aij->A)); 2901 PetscCall(MatRetrieveValues(aij->B)); 2902 PetscFunctionReturn(0); 2903 } 2904 2905 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2906 { 2907 Mat_MPIAIJ *b; 2908 PetscMPIInt size; 2909 2910 PetscFunctionBegin; 2911 PetscCall(PetscLayoutSetUp(B->rmap)); 2912 PetscCall(PetscLayoutSetUp(B->cmap)); 2913 b = (Mat_MPIAIJ *)B->data; 2914 2915 #if defined(PETSC_USE_CTABLE) 2916 PetscCall(PetscHMapIDestroy(&b->colmap)); 2917 #else 2918 PetscCall(PetscFree(b->colmap)); 2919 #endif 2920 PetscCall(PetscFree(b->garray)); 2921 PetscCall(VecDestroy(&b->lvec)); 2922 PetscCall(VecScatterDestroy(&b->Mvctx)); 2923 2924 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2925 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2926 PetscCall(MatDestroy(&b->B)); 2927 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2928 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2929 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2930 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2931 2932 if (!B->preallocated) { 2933 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2934 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2935 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2936 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2937 } 2938 2939 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2940 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2941 B->preallocated = PETSC_TRUE; 2942 B->was_assembled = PETSC_FALSE; 2943 B->assembled = PETSC_FALSE; 2944 PetscFunctionReturn(0); 2945 } 2946 2947 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2948 { 2949 Mat_MPIAIJ *b; 2950 2951 PetscFunctionBegin; 2952 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2953 PetscCall(PetscLayoutSetUp(B->rmap)); 2954 PetscCall(PetscLayoutSetUp(B->cmap)); 2955 b = (Mat_MPIAIJ *)B->data; 2956 2957 #if defined(PETSC_USE_CTABLE) 2958 PetscCall(PetscHMapIDestroy(&b->colmap)); 2959 #else 2960 PetscCall(PetscFree(b->colmap)); 2961 #endif 2962 PetscCall(PetscFree(b->garray)); 2963 PetscCall(VecDestroy(&b->lvec)); 2964 PetscCall(VecScatterDestroy(&b->Mvctx)); 2965 2966 PetscCall(MatResetPreallocation(b->A)); 2967 PetscCall(MatResetPreallocation(b->B)); 2968 B->preallocated = PETSC_TRUE; 2969 B->was_assembled = PETSC_FALSE; 2970 B->assembled = PETSC_FALSE; 2971 PetscFunctionReturn(0); 2972 } 2973 2974 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2975 { 2976 Mat mat; 2977 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2978 2979 PetscFunctionBegin; 2980 *newmat = NULL; 2981 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2982 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2983 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2984 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2985 a = (Mat_MPIAIJ *)mat->data; 2986 2987 mat->factortype = matin->factortype; 2988 mat->assembled = matin->assembled; 2989 mat->insertmode = NOT_SET_VALUES; 2990 mat->preallocated = matin->preallocated; 2991 2992 a->size = oldmat->size; 2993 a->rank = oldmat->rank; 2994 a->donotstash = oldmat->donotstash; 2995 a->roworiented = oldmat->roworiented; 2996 a->rowindices = NULL; 2997 a->rowvalues = NULL; 2998 a->getrowactive = PETSC_FALSE; 2999 3000 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3001 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3002 3003 if (oldmat->colmap) { 3004 #if defined(PETSC_USE_CTABLE) 3005 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3006 #else 3007 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3008 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3009 #endif 3010 } else a->colmap = NULL; 3011 if (oldmat->garray) { 3012 PetscInt len; 3013 len = oldmat->B->cmap->n; 3014 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3015 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3016 } else a->garray = NULL; 3017 3018 /* It may happen MatDuplicate is called with a non-assembled matrix 3019 In fact, MatDuplicate only requires the matrix to be preallocated 3020 This may happen inside a DMCreateMatrix_Shell */ 3021 if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); } 3022 if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); } 3023 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3024 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3025 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3026 *newmat = mat; 3027 PetscFunctionReturn(0); 3028 } 3029 3030 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3031 { 3032 PetscBool isbinary, ishdf5; 3033 3034 PetscFunctionBegin; 3035 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3036 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3037 /* force binary viewer to load .info file if it has not yet done so */ 3038 PetscCall(PetscViewerSetUp(viewer)); 3039 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3040 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3041 if (isbinary) { 3042 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3043 } else if (ishdf5) { 3044 #if defined(PETSC_HAVE_HDF5) 3045 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3046 #else 3047 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3048 #endif 3049 } else { 3050 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3051 } 3052 PetscFunctionReturn(0); 3053 } 3054 3055 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3056 { 3057 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3058 PetscInt *rowidxs, *colidxs; 3059 PetscScalar *matvals; 3060 3061 PetscFunctionBegin; 3062 PetscCall(PetscViewerSetUp(viewer)); 3063 3064 /* read in matrix header */ 3065 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3066 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3067 M = header[1]; 3068 N = header[2]; 3069 nz = header[3]; 3070 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3071 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3072 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3073 3074 /* set block sizes from the viewer's .info file */ 3075 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3076 /* set global sizes if not set already */ 3077 if (mat->rmap->N < 0) mat->rmap->N = M; 3078 if (mat->cmap->N < 0) mat->cmap->N = N; 3079 PetscCall(PetscLayoutSetUp(mat->rmap)); 3080 PetscCall(PetscLayoutSetUp(mat->cmap)); 3081 3082 /* check if the matrix sizes are correct */ 3083 PetscCall(MatGetSize(mat, &rows, &cols)); 3084 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3085 3086 /* read in row lengths and build row indices */ 3087 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3088 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3089 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3090 rowidxs[0] = 0; 3091 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3092 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3093 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3094 /* read in column indices and matrix values */ 3095 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3096 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3097 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3098 /* store matrix indices and values */ 3099 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3100 PetscCall(PetscFree(rowidxs)); 3101 PetscCall(PetscFree2(colidxs, matvals)); 3102 PetscFunctionReturn(0); 3103 } 3104 3105 /* Not scalable because of ISAllGather() unless getting all columns. */ 3106 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3107 { 3108 IS iscol_local; 3109 PetscBool isstride; 3110 PetscMPIInt lisstride = 0, gisstride; 3111 3112 PetscFunctionBegin; 3113 /* check if we are grabbing all columns*/ 3114 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3115 3116 if (isstride) { 3117 PetscInt start, len, mstart, mlen; 3118 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3119 PetscCall(ISGetLocalSize(iscol, &len)); 3120 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3121 if (mstart == start && mlen - mstart == len) lisstride = 1; 3122 } 3123 3124 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3125 if (gisstride) { 3126 PetscInt N; 3127 PetscCall(MatGetSize(mat, NULL, &N)); 3128 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3129 PetscCall(ISSetIdentity(iscol_local)); 3130 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3131 } else { 3132 PetscInt cbs; 3133 PetscCall(ISGetBlockSize(iscol, &cbs)); 3134 PetscCall(ISAllGather(iscol, &iscol_local)); 3135 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3136 } 3137 3138 *isseq = iscol_local; 3139 PetscFunctionReturn(0); 3140 } 3141 3142 /* 3143 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3144 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3145 3146 Input Parameters: 3147 mat - matrix 3148 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3149 i.e., mat->rstart <= isrow[i] < mat->rend 3150 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3151 i.e., mat->cstart <= iscol[i] < mat->cend 3152 Output Parameter: 3153 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3154 iscol_o - sequential column index set for retrieving mat->B 3155 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3156 */ 3157 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3158 { 3159 Vec x, cmap; 3160 const PetscInt *is_idx; 3161 PetscScalar *xarray, *cmaparray; 3162 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3163 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3164 Mat B = a->B; 3165 Vec lvec = a->lvec, lcmap; 3166 PetscInt i, cstart, cend, Bn = B->cmap->N; 3167 MPI_Comm comm; 3168 VecScatter Mvctx = a->Mvctx; 3169 3170 PetscFunctionBegin; 3171 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3172 PetscCall(ISGetLocalSize(iscol, &ncols)); 3173 3174 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3175 PetscCall(MatCreateVecs(mat, &x, NULL)); 3176 PetscCall(VecSet(x, -1.0)); 3177 PetscCall(VecDuplicate(x, &cmap)); 3178 PetscCall(VecSet(cmap, -1.0)); 3179 3180 /* Get start indices */ 3181 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3182 isstart -= ncols; 3183 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3184 3185 PetscCall(ISGetIndices(iscol, &is_idx)); 3186 PetscCall(VecGetArray(x, &xarray)); 3187 PetscCall(VecGetArray(cmap, &cmaparray)); 3188 PetscCall(PetscMalloc1(ncols, &idx)); 3189 for (i = 0; i < ncols; i++) { 3190 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3191 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3192 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3193 } 3194 PetscCall(VecRestoreArray(x, &xarray)); 3195 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3196 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3197 3198 /* Get iscol_d */ 3199 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3200 PetscCall(ISGetBlockSize(iscol, &i)); 3201 PetscCall(ISSetBlockSize(*iscol_d, i)); 3202 3203 /* Get isrow_d */ 3204 PetscCall(ISGetLocalSize(isrow, &m)); 3205 rstart = mat->rmap->rstart; 3206 PetscCall(PetscMalloc1(m, &idx)); 3207 PetscCall(ISGetIndices(isrow, &is_idx)); 3208 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3209 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3210 3211 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3212 PetscCall(ISGetBlockSize(isrow, &i)); 3213 PetscCall(ISSetBlockSize(*isrow_d, i)); 3214 3215 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3216 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3217 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3218 3219 PetscCall(VecDuplicate(lvec, &lcmap)); 3220 3221 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3222 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3223 3224 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3225 /* off-process column indices */ 3226 count = 0; 3227 PetscCall(PetscMalloc1(Bn, &idx)); 3228 PetscCall(PetscMalloc1(Bn, &cmap1)); 3229 3230 PetscCall(VecGetArray(lvec, &xarray)); 3231 PetscCall(VecGetArray(lcmap, &cmaparray)); 3232 for (i = 0; i < Bn; i++) { 3233 if (PetscRealPart(xarray[i]) > -1.0) { 3234 idx[count] = i; /* local column index in off-diagonal part B */ 3235 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3236 count++; 3237 } 3238 } 3239 PetscCall(VecRestoreArray(lvec, &xarray)); 3240 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3241 3242 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3243 /* cannot ensure iscol_o has same blocksize as iscol! */ 3244 3245 PetscCall(PetscFree(idx)); 3246 *garray = cmap1; 3247 3248 PetscCall(VecDestroy(&x)); 3249 PetscCall(VecDestroy(&cmap)); 3250 PetscCall(VecDestroy(&lcmap)); 3251 PetscFunctionReturn(0); 3252 } 3253 3254 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3255 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3256 { 3257 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3258 Mat M = NULL; 3259 MPI_Comm comm; 3260 IS iscol_d, isrow_d, iscol_o; 3261 Mat Asub = NULL, Bsub = NULL; 3262 PetscInt n; 3263 3264 PetscFunctionBegin; 3265 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3266 3267 if (call == MAT_REUSE_MATRIX) { 3268 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3269 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3270 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3271 3272 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3273 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3274 3275 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3276 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3277 3278 /* Update diagonal and off-diagonal portions of submat */ 3279 asub = (Mat_MPIAIJ *)(*submat)->data; 3280 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3281 PetscCall(ISGetLocalSize(iscol_o, &n)); 3282 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3283 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3284 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3285 3286 } else { /* call == MAT_INITIAL_MATRIX) */ 3287 const PetscInt *garray; 3288 PetscInt BsubN; 3289 3290 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3291 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3292 3293 /* Create local submatrices Asub and Bsub */ 3294 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3295 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3296 3297 /* Create submatrix M */ 3298 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3299 3300 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3301 asub = (Mat_MPIAIJ *)M->data; 3302 3303 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3304 n = asub->B->cmap->N; 3305 if (BsubN > n) { 3306 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3307 const PetscInt *idx; 3308 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3309 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3310 3311 PetscCall(PetscMalloc1(n, &idx_new)); 3312 j = 0; 3313 PetscCall(ISGetIndices(iscol_o, &idx)); 3314 for (i = 0; i < n; i++) { 3315 if (j >= BsubN) break; 3316 while (subgarray[i] > garray[j]) j++; 3317 3318 if (subgarray[i] == garray[j]) { 3319 idx_new[i] = idx[j++]; 3320 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3321 } 3322 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3323 3324 PetscCall(ISDestroy(&iscol_o)); 3325 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3326 3327 } else if (BsubN < n) { 3328 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3329 } 3330 3331 PetscCall(PetscFree(garray)); 3332 *submat = M; 3333 3334 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3335 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3336 PetscCall(ISDestroy(&isrow_d)); 3337 3338 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3339 PetscCall(ISDestroy(&iscol_d)); 3340 3341 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3342 PetscCall(ISDestroy(&iscol_o)); 3343 } 3344 PetscFunctionReturn(0); 3345 } 3346 3347 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3348 { 3349 IS iscol_local = NULL, isrow_d; 3350 PetscInt csize; 3351 PetscInt n, i, j, start, end; 3352 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3353 MPI_Comm comm; 3354 3355 PetscFunctionBegin; 3356 /* If isrow has same processor distribution as mat, 3357 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3358 if (call == MAT_REUSE_MATRIX) { 3359 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3360 if (isrow_d) { 3361 sameRowDist = PETSC_TRUE; 3362 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3363 } else { 3364 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3365 if (iscol_local) { 3366 sameRowDist = PETSC_TRUE; 3367 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3368 } 3369 } 3370 } else { 3371 /* Check if isrow has same processor distribution as mat */ 3372 sameDist[0] = PETSC_FALSE; 3373 PetscCall(ISGetLocalSize(isrow, &n)); 3374 if (!n) { 3375 sameDist[0] = PETSC_TRUE; 3376 } else { 3377 PetscCall(ISGetMinMax(isrow, &i, &j)); 3378 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3379 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3380 } 3381 3382 /* Check if iscol has same processor distribution as mat */ 3383 sameDist[1] = PETSC_FALSE; 3384 PetscCall(ISGetLocalSize(iscol, &n)); 3385 if (!n) { 3386 sameDist[1] = PETSC_TRUE; 3387 } else { 3388 PetscCall(ISGetMinMax(iscol, &i, &j)); 3389 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3390 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3391 } 3392 3393 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3394 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3395 sameRowDist = tsameDist[0]; 3396 } 3397 3398 if (sameRowDist) { 3399 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3400 /* isrow and iscol have same processor distribution as mat */ 3401 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3402 PetscFunctionReturn(0); 3403 } else { /* sameRowDist */ 3404 /* isrow has same processor distribution as mat */ 3405 if (call == MAT_INITIAL_MATRIX) { 3406 PetscBool sorted; 3407 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3408 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3409 PetscCall(ISGetSize(iscol, &i)); 3410 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3411 3412 PetscCall(ISSorted(iscol_local, &sorted)); 3413 if (sorted) { 3414 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3415 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3416 PetscFunctionReturn(0); 3417 } 3418 } else { /* call == MAT_REUSE_MATRIX */ 3419 IS iscol_sub; 3420 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3421 if (iscol_sub) { 3422 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3423 PetscFunctionReturn(0); 3424 } 3425 } 3426 } 3427 } 3428 3429 /* General case: iscol -> iscol_local which has global size of iscol */ 3430 if (call == MAT_REUSE_MATRIX) { 3431 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3432 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3433 } else { 3434 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3435 } 3436 3437 PetscCall(ISGetLocalSize(iscol, &csize)); 3438 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3439 3440 if (call == MAT_INITIAL_MATRIX) { 3441 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3442 PetscCall(ISDestroy(&iscol_local)); 3443 } 3444 PetscFunctionReturn(0); 3445 } 3446 3447 /*@C 3448 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3449 and "off-diagonal" part of the matrix in CSR format. 3450 3451 Collective 3452 3453 Input Parameters: 3454 + comm - MPI communicator 3455 . A - "diagonal" portion of matrix 3456 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3457 - garray - global index of B columns 3458 3459 Output Parameter: 3460 . mat - the matrix, with input A as its local diagonal matrix 3461 Level: advanced 3462 3463 Notes: 3464 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3465 3466 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3467 3468 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3469 @*/ 3470 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3471 { 3472 Mat_MPIAIJ *maij; 3473 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3474 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3475 const PetscScalar *oa; 3476 Mat Bnew; 3477 PetscInt m, n, N; 3478 MatType mpi_mat_type; 3479 3480 PetscFunctionBegin; 3481 PetscCall(MatCreate(comm, mat)); 3482 PetscCall(MatGetSize(A, &m, &n)); 3483 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3484 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3485 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3486 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3487 3488 /* Get global columns of mat */ 3489 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3490 3491 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3492 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3493 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3494 PetscCall(MatSetType(*mat, mpi_mat_type)); 3495 3496 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3497 maij = (Mat_MPIAIJ *)(*mat)->data; 3498 3499 (*mat)->preallocated = PETSC_TRUE; 3500 3501 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3502 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3503 3504 /* Set A as diagonal portion of *mat */ 3505 maij->A = A; 3506 3507 nz = oi[m]; 3508 for (i = 0; i < nz; i++) { 3509 col = oj[i]; 3510 oj[i] = garray[col]; 3511 } 3512 3513 /* Set Bnew as off-diagonal portion of *mat */ 3514 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3515 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3516 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3517 bnew = (Mat_SeqAIJ *)Bnew->data; 3518 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3519 maij->B = Bnew; 3520 3521 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3522 3523 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3524 b->free_a = PETSC_FALSE; 3525 b->free_ij = PETSC_FALSE; 3526 PetscCall(MatDestroy(&B)); 3527 3528 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3529 bnew->free_a = PETSC_TRUE; 3530 bnew->free_ij = PETSC_TRUE; 3531 3532 /* condense columns of maij->B */ 3533 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3534 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3535 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3536 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3537 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3538 PetscFunctionReturn(0); 3539 } 3540 3541 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3542 3543 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3544 { 3545 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3546 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3547 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3548 Mat M, Msub, B = a->B; 3549 MatScalar *aa; 3550 Mat_SeqAIJ *aij; 3551 PetscInt *garray = a->garray, *colsub, Ncols; 3552 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3553 IS iscol_sub, iscmap; 3554 const PetscInt *is_idx, *cmap; 3555 PetscBool allcolumns = PETSC_FALSE; 3556 MPI_Comm comm; 3557 3558 PetscFunctionBegin; 3559 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3560 if (call == MAT_REUSE_MATRIX) { 3561 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3562 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3563 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3564 3565 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3566 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3567 3568 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3569 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3570 3571 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3572 3573 } else { /* call == MAT_INITIAL_MATRIX) */ 3574 PetscBool flg; 3575 3576 PetscCall(ISGetLocalSize(iscol, &n)); 3577 PetscCall(ISGetSize(iscol, &Ncols)); 3578 3579 /* (1) iscol -> nonscalable iscol_local */ 3580 /* Check for special case: each processor gets entire matrix columns */ 3581 PetscCall(ISIdentity(iscol_local, &flg)); 3582 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3583 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3584 if (allcolumns) { 3585 iscol_sub = iscol_local; 3586 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3587 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3588 3589 } else { 3590 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3591 PetscInt *idx, *cmap1, k; 3592 PetscCall(PetscMalloc1(Ncols, &idx)); 3593 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3594 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3595 count = 0; 3596 k = 0; 3597 for (i = 0; i < Ncols; i++) { 3598 j = is_idx[i]; 3599 if (j >= cstart && j < cend) { 3600 /* diagonal part of mat */ 3601 idx[count] = j; 3602 cmap1[count++] = i; /* column index in submat */ 3603 } else if (Bn) { 3604 /* off-diagonal part of mat */ 3605 if (j == garray[k]) { 3606 idx[count] = j; 3607 cmap1[count++] = i; /* column index in submat */ 3608 } else if (j > garray[k]) { 3609 while (j > garray[k] && k < Bn - 1) k++; 3610 if (j == garray[k]) { 3611 idx[count] = j; 3612 cmap1[count++] = i; /* column index in submat */ 3613 } 3614 } 3615 } 3616 } 3617 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3618 3619 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3620 PetscCall(ISGetBlockSize(iscol, &cbs)); 3621 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3622 3623 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3624 } 3625 3626 /* (3) Create sequential Msub */ 3627 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3628 } 3629 3630 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3631 aij = (Mat_SeqAIJ *)(Msub)->data; 3632 ii = aij->i; 3633 PetscCall(ISGetIndices(iscmap, &cmap)); 3634 3635 /* 3636 m - number of local rows 3637 Ncols - number of columns (same on all processors) 3638 rstart - first row in new global matrix generated 3639 */ 3640 PetscCall(MatGetSize(Msub, &m, NULL)); 3641 3642 if (call == MAT_INITIAL_MATRIX) { 3643 /* (4) Create parallel newmat */ 3644 PetscMPIInt rank, size; 3645 PetscInt csize; 3646 3647 PetscCallMPI(MPI_Comm_size(comm, &size)); 3648 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3649 3650 /* 3651 Determine the number of non-zeros in the diagonal and off-diagonal 3652 portions of the matrix in order to do correct preallocation 3653 */ 3654 3655 /* first get start and end of "diagonal" columns */ 3656 PetscCall(ISGetLocalSize(iscol, &csize)); 3657 if (csize == PETSC_DECIDE) { 3658 PetscCall(ISGetSize(isrow, &mglobal)); 3659 if (mglobal == Ncols) { /* square matrix */ 3660 nlocal = m; 3661 } else { 3662 nlocal = Ncols / size + ((Ncols % size) > rank); 3663 } 3664 } else { 3665 nlocal = csize; 3666 } 3667 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3668 rstart = rend - nlocal; 3669 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3670 3671 /* next, compute all the lengths */ 3672 jj = aij->j; 3673 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3674 olens = dlens + m; 3675 for (i = 0; i < m; i++) { 3676 jend = ii[i + 1] - ii[i]; 3677 olen = 0; 3678 dlen = 0; 3679 for (j = 0; j < jend; j++) { 3680 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3681 else dlen++; 3682 jj++; 3683 } 3684 olens[i] = olen; 3685 dlens[i] = dlen; 3686 } 3687 3688 PetscCall(ISGetBlockSize(isrow, &bs)); 3689 PetscCall(ISGetBlockSize(iscol, &cbs)); 3690 3691 PetscCall(MatCreate(comm, &M)); 3692 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3693 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3694 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3695 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3696 PetscCall(PetscFree(dlens)); 3697 3698 } else { /* call == MAT_REUSE_MATRIX */ 3699 M = *newmat; 3700 PetscCall(MatGetLocalSize(M, &i, NULL)); 3701 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3702 PetscCall(MatZeroEntries(M)); 3703 /* 3704 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3705 rather than the slower MatSetValues(). 3706 */ 3707 M->was_assembled = PETSC_TRUE; 3708 M->assembled = PETSC_FALSE; 3709 } 3710 3711 /* (5) Set values of Msub to *newmat */ 3712 PetscCall(PetscMalloc1(count, &colsub)); 3713 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3714 3715 jj = aij->j; 3716 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3717 for (i = 0; i < m; i++) { 3718 row = rstart + i; 3719 nz = ii[i + 1] - ii[i]; 3720 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3721 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3722 jj += nz; 3723 aa += nz; 3724 } 3725 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3726 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3727 3728 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3729 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3730 3731 PetscCall(PetscFree(colsub)); 3732 3733 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3734 if (call == MAT_INITIAL_MATRIX) { 3735 *newmat = M; 3736 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3737 PetscCall(MatDestroy(&Msub)); 3738 3739 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3740 PetscCall(ISDestroy(&iscol_sub)); 3741 3742 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3743 PetscCall(ISDestroy(&iscmap)); 3744 3745 if (iscol_local) { 3746 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3747 PetscCall(ISDestroy(&iscol_local)); 3748 } 3749 } 3750 PetscFunctionReturn(0); 3751 } 3752 3753 /* 3754 Not great since it makes two copies of the submatrix, first an SeqAIJ 3755 in local and then by concatenating the local matrices the end result. 3756 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3757 3758 This requires a sequential iscol with all indices. 3759 */ 3760 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3761 { 3762 PetscMPIInt rank, size; 3763 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3764 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3765 Mat M, Mreuse; 3766 MatScalar *aa, *vwork; 3767 MPI_Comm comm; 3768 Mat_SeqAIJ *aij; 3769 PetscBool colflag, allcolumns = PETSC_FALSE; 3770 3771 PetscFunctionBegin; 3772 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3773 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3774 PetscCallMPI(MPI_Comm_size(comm, &size)); 3775 3776 /* Check for special case: each processor gets entire matrix columns */ 3777 PetscCall(ISIdentity(iscol, &colflag)); 3778 PetscCall(ISGetLocalSize(iscol, &n)); 3779 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3780 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3781 3782 if (call == MAT_REUSE_MATRIX) { 3783 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3784 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3785 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3786 } else { 3787 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3788 } 3789 3790 /* 3791 m - number of local rows 3792 n - number of columns (same on all processors) 3793 rstart - first row in new global matrix generated 3794 */ 3795 PetscCall(MatGetSize(Mreuse, &m, &n)); 3796 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3797 if (call == MAT_INITIAL_MATRIX) { 3798 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3799 ii = aij->i; 3800 jj = aij->j; 3801 3802 /* 3803 Determine the number of non-zeros in the diagonal and off-diagonal 3804 portions of the matrix in order to do correct preallocation 3805 */ 3806 3807 /* first get start and end of "diagonal" columns */ 3808 if (csize == PETSC_DECIDE) { 3809 PetscCall(ISGetSize(isrow, &mglobal)); 3810 if (mglobal == n) { /* square matrix */ 3811 nlocal = m; 3812 } else { 3813 nlocal = n / size + ((n % size) > rank); 3814 } 3815 } else { 3816 nlocal = csize; 3817 } 3818 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3819 rstart = rend - nlocal; 3820 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3821 3822 /* next, compute all the lengths */ 3823 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3824 olens = dlens + m; 3825 for (i = 0; i < m; i++) { 3826 jend = ii[i + 1] - ii[i]; 3827 olen = 0; 3828 dlen = 0; 3829 for (j = 0; j < jend; j++) { 3830 if (*jj < rstart || *jj >= rend) olen++; 3831 else dlen++; 3832 jj++; 3833 } 3834 olens[i] = olen; 3835 dlens[i] = dlen; 3836 } 3837 PetscCall(MatCreate(comm, &M)); 3838 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3839 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3840 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3841 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3842 PetscCall(PetscFree(dlens)); 3843 } else { 3844 PetscInt ml, nl; 3845 3846 M = *newmat; 3847 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3848 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3849 PetscCall(MatZeroEntries(M)); 3850 /* 3851 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3852 rather than the slower MatSetValues(). 3853 */ 3854 M->was_assembled = PETSC_TRUE; 3855 M->assembled = PETSC_FALSE; 3856 } 3857 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3858 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3859 ii = aij->i; 3860 jj = aij->j; 3861 3862 /* trigger copy to CPU if needed */ 3863 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3864 for (i = 0; i < m; i++) { 3865 row = rstart + i; 3866 nz = ii[i + 1] - ii[i]; 3867 cwork = jj; 3868 jj += nz; 3869 vwork = aa; 3870 aa += nz; 3871 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3872 } 3873 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3874 3875 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3876 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3877 *newmat = M; 3878 3879 /* save submatrix used in processor for next request */ 3880 if (call == MAT_INITIAL_MATRIX) { 3881 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3882 PetscCall(MatDestroy(&Mreuse)); 3883 } 3884 PetscFunctionReturn(0); 3885 } 3886 3887 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3888 { 3889 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3890 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3891 const PetscInt *JJ; 3892 PetscBool nooffprocentries; 3893 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3894 3895 PetscFunctionBegin; 3896 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3897 3898 PetscCall(PetscLayoutSetUp(B->rmap)); 3899 PetscCall(PetscLayoutSetUp(B->cmap)); 3900 m = B->rmap->n; 3901 cstart = B->cmap->rstart; 3902 cend = B->cmap->rend; 3903 rstart = B->rmap->rstart; 3904 3905 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3906 3907 if (PetscDefined(USE_DEBUG)) { 3908 for (i = 0; i < m; i++) { 3909 nnz = Ii[i + 1] - Ii[i]; 3910 JJ = J + Ii[i]; 3911 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3912 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3913 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3914 } 3915 } 3916 3917 for (i = 0; i < m; i++) { 3918 nnz = Ii[i + 1] - Ii[i]; 3919 JJ = J + Ii[i]; 3920 nnz_max = PetscMax(nnz_max, nnz); 3921 d = 0; 3922 for (j = 0; j < nnz; j++) { 3923 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3924 } 3925 d_nnz[i] = d; 3926 o_nnz[i] = nnz - d; 3927 } 3928 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3929 PetscCall(PetscFree2(d_nnz, o_nnz)); 3930 3931 for (i = 0; i < m; i++) { 3932 ii = i + rstart; 3933 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3934 } 3935 nooffprocentries = B->nooffprocentries; 3936 B->nooffprocentries = PETSC_TRUE; 3937 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3938 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3939 B->nooffprocentries = nooffprocentries; 3940 3941 /* count number of entries below block diagonal */ 3942 PetscCall(PetscFree(Aij->ld)); 3943 PetscCall(PetscCalloc1(m, &ld)); 3944 Aij->ld = ld; 3945 for (i = 0; i < m; i++) { 3946 nnz = Ii[i + 1] - Ii[i]; 3947 j = 0; 3948 while (j < nnz && J[j] < cstart) j++; 3949 ld[i] = j; 3950 J += nnz; 3951 } 3952 3953 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3954 PetscFunctionReturn(0); 3955 } 3956 3957 /*@ 3958 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3959 (the default parallel PETSc format). 3960 3961 Collective 3962 3963 Input Parameters: 3964 + B - the matrix 3965 . i - the indices into j for the start of each local row (starts with zero) 3966 . j - the column indices for each local row (starts with zero) 3967 - v - optional values in the matrix 3968 3969 Level: developer 3970 3971 Notes: 3972 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3973 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3974 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3975 3976 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3977 3978 The format which is used for the sparse matrix input, is equivalent to a 3979 row-major ordering.. i.e for the following matrix, the input data expected is 3980 as shown 3981 3982 $ 1 0 0 3983 $ 2 0 3 P0 3984 $ ------- 3985 $ 4 5 6 P1 3986 $ 3987 $ Process0 [P0]: rows_owned=[0,1] 3988 $ i = {0,1,3} [size = nrow+1 = 2+1] 3989 $ j = {0,0,2} [size = 3] 3990 $ v = {1,2,3} [size = 3] 3991 $ 3992 $ Process1 [P1]: rows_owned=[2] 3993 $ i = {0,3} [size = nrow+1 = 1+1] 3994 $ j = {0,1,2} [size = 3] 3995 $ v = {4,5,6} [size = 3] 3996 3997 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3998 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3999 @*/ 4000 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4001 { 4002 PetscFunctionBegin; 4003 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4004 PetscFunctionReturn(0); 4005 } 4006 4007 /*@C 4008 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4009 (the default parallel PETSc format). For good matrix assembly performance 4010 the user should preallocate the matrix storage by setting the parameters 4011 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4012 performance can be increased by more than a factor of 50. 4013 4014 Collective 4015 4016 Input Parameters: 4017 + B - the matrix 4018 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4019 (same value is used for all local rows) 4020 . d_nnz - array containing the number of nonzeros in the various rows of the 4021 DIAGONAL portion of the local submatrix (possibly different for each row) 4022 or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure. 4023 The size of this array is equal to the number of local rows, i.e 'm'. 4024 For matrices that will be factored, you must leave room for (and set) 4025 the diagonal entry even if it is zero. 4026 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4027 submatrix (same value is used for all local rows). 4028 - o_nnz - array containing the number of nonzeros in the various rows of the 4029 OFF-DIAGONAL portion of the local submatrix (possibly different for 4030 each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero 4031 structure. The size of this array is equal to the number 4032 of local rows, i.e 'm'. 4033 4034 If the *_nnz parameter is given then the *_nz parameter is ignored 4035 4036 The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77 4037 storage. The stored row and column indices begin with zero. 4038 See [Sparse Matrices](sec_matsparse) for details. 4039 4040 The parallel matrix is partitioned such that the first m0 rows belong to 4041 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4042 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4043 4044 The DIAGONAL portion of the local submatrix of a processor can be defined 4045 as the submatrix which is obtained by extraction the part corresponding to 4046 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4047 first row that belongs to the processor, r2 is the last row belonging to 4048 the this processor, and c1-c2 is range of indices of the local part of a 4049 vector suitable for applying the matrix to. This is an mxn matrix. In the 4050 common case of a square matrix, the row and column ranges are the same and 4051 the DIAGONAL part is also square. The remaining portion of the local 4052 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4053 4054 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4055 4056 You can call MatGetInfo() to get information on how effective the preallocation was; 4057 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4058 You can also run with the option -info and look for messages with the string 4059 malloc in them to see if additional memory allocation was needed. 4060 4061 Example usage: 4062 4063 Consider the following 8x8 matrix with 34 non-zero values, that is 4064 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4065 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4066 as follows: 4067 4068 .vb 4069 1 2 0 | 0 3 0 | 0 4 4070 Proc0 0 5 6 | 7 0 0 | 8 0 4071 9 0 10 | 11 0 0 | 12 0 4072 ------------------------------------- 4073 13 0 14 | 15 16 17 | 0 0 4074 Proc1 0 18 0 | 19 20 21 | 0 0 4075 0 0 0 | 22 23 0 | 24 0 4076 ------------------------------------- 4077 Proc2 25 26 27 | 0 0 28 | 29 0 4078 30 0 0 | 31 32 33 | 0 34 4079 .ve 4080 4081 This can be represented as a collection of submatrices as: 4082 4083 .vb 4084 A B C 4085 D E F 4086 G H I 4087 .ve 4088 4089 Where the submatrices A,B,C are owned by proc0, D,E,F are 4090 owned by proc1, G,H,I are owned by proc2. 4091 4092 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4093 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4094 The 'M','N' parameters are 8,8, and have the same values on all procs. 4095 4096 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4097 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4098 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4099 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4100 part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ 4101 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4102 4103 When d_nz, o_nz parameters are specified, d_nz storage elements are 4104 allocated for every row of the local diagonal submatrix, and o_nz 4105 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4106 One way to choose d_nz and o_nz is to use the max nonzerors per local 4107 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4108 In this case, the values of d_nz,o_nz are: 4109 .vb 4110 proc0 : dnz = 2, o_nz = 2 4111 proc1 : dnz = 3, o_nz = 2 4112 proc2 : dnz = 1, o_nz = 4 4113 .ve 4114 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4115 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4116 for proc3. i.e we are using 12+15+10=37 storage locations to store 4117 34 values. 4118 4119 When d_nnz, o_nnz parameters are specified, the storage is specified 4120 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4121 In the above case the values for d_nnz,o_nnz are: 4122 .vb 4123 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4124 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4125 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4126 .ve 4127 Here the space allocated is sum of all the above values i.e 34, and 4128 hence pre-allocation is perfect. 4129 4130 Level: intermediate 4131 4132 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4133 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4134 @*/ 4135 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4136 { 4137 PetscFunctionBegin; 4138 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4139 PetscValidType(B, 1); 4140 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4141 PetscFunctionReturn(0); 4142 } 4143 4144 /*@ 4145 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4146 CSR format for the local rows. 4147 4148 Collective 4149 4150 Input Parameters: 4151 + comm - MPI communicator 4152 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4153 . n - This value should be the same as the local size used in creating the 4154 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4155 calculated if N is given) For square matrices n is almost always m. 4156 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4157 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4158 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4159 . j - column indices 4160 - a - optional matrix values 4161 4162 Output Parameter: 4163 . mat - the matrix 4164 4165 Level: intermediate 4166 4167 Notes: 4168 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4169 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4170 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4171 4172 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4173 4174 The format which is used for the sparse matrix input, is equivalent to a 4175 row-major ordering.. i.e for the following matrix, the input data expected is 4176 as shown 4177 4178 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4179 4180 $ 1 0 0 4181 $ 2 0 3 P0 4182 $ ------- 4183 $ 4 5 6 P1 4184 $ 4185 $ Process0 [P0]: rows_owned=[0,1] 4186 $ i = {0,1,3} [size = nrow+1 = 2+1] 4187 $ j = {0,0,2} [size = 3] 4188 $ v = {1,2,3} [size = 3] 4189 $ 4190 $ Process1 [P1]: rows_owned=[2] 4191 $ i = {0,3} [size = nrow+1 = 1+1] 4192 $ j = {0,1,2} [size = 3] 4193 $ v = {4,5,6} [size = 3] 4194 4195 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4196 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4197 @*/ 4198 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4199 { 4200 PetscFunctionBegin; 4201 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4202 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4203 PetscCall(MatCreate(comm, mat)); 4204 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4205 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4206 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4207 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4208 PetscFunctionReturn(0); 4209 } 4210 4211 /*@ 4212 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4213 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()` 4214 4215 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4216 4217 Collective 4218 4219 Input Parameters: 4220 + mat - the matrix 4221 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4222 . n - This value should be the same as the local size used in creating the 4223 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4224 calculated if N is given) For square matrices n is almost always m. 4225 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4226 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4227 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4228 . J - column indices 4229 - v - matrix values 4230 4231 Level: intermediate 4232 4233 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4234 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4235 @*/ 4236 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4237 { 4238 PetscInt nnz, i; 4239 PetscBool nooffprocentries; 4240 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4241 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4242 PetscScalar *ad, *ao; 4243 PetscInt ldi, Iii, md; 4244 const PetscInt *Adi = Ad->i; 4245 PetscInt *ld = Aij->ld; 4246 4247 PetscFunctionBegin; 4248 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4249 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4250 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4251 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4252 4253 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4254 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4255 4256 for (i = 0; i < m; i++) { 4257 nnz = Ii[i + 1] - Ii[i]; 4258 Iii = Ii[i]; 4259 ldi = ld[i]; 4260 md = Adi[i + 1] - Adi[i]; 4261 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4262 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4263 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4264 ad += md; 4265 ao += nnz - md; 4266 } 4267 nooffprocentries = mat->nooffprocentries; 4268 mat->nooffprocentries = PETSC_TRUE; 4269 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4270 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4271 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4272 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4273 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4274 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4275 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4276 mat->nooffprocentries = nooffprocentries; 4277 PetscFunctionReturn(0); 4278 } 4279 4280 /*@ 4281 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4282 4283 Collective 4284 4285 Input Parameters: 4286 + mat - the matrix 4287 - v - matrix values, stored by row 4288 4289 Level: intermediate 4290 4291 Note: 4292 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4293 4294 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4295 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4296 @*/ 4297 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4298 { 4299 PetscInt nnz, i, m; 4300 PetscBool nooffprocentries; 4301 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4302 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4303 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4304 PetscScalar *ad, *ao; 4305 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4306 PetscInt ldi, Iii, md; 4307 PetscInt *ld = Aij->ld; 4308 4309 PetscFunctionBegin; 4310 m = mat->rmap->n; 4311 4312 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4313 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4314 Iii = 0; 4315 for (i = 0; i < m; i++) { 4316 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4317 ldi = ld[i]; 4318 md = Adi[i + 1] - Adi[i]; 4319 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4320 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4321 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4322 ad += md; 4323 ao += nnz - md; 4324 Iii += nnz; 4325 } 4326 nooffprocentries = mat->nooffprocentries; 4327 mat->nooffprocentries = PETSC_TRUE; 4328 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4329 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4330 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4331 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4332 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4333 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4334 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4335 mat->nooffprocentries = nooffprocentries; 4336 PetscFunctionReturn(0); 4337 } 4338 4339 /*@C 4340 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4341 (the default parallel PETSc format). For good matrix assembly performance 4342 the user should preallocate the matrix storage by setting the parameters 4343 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4344 performance can be increased by more than a factor of 50. 4345 4346 Collective 4347 4348 Input Parameters: 4349 + comm - MPI communicator 4350 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4351 This value should be the same as the local size used in creating the 4352 y vector for the matrix-vector product y = Ax. 4353 . n - This value should be the same as the local size used in creating the 4354 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4355 calculated if N is given) For square matrices n is almost always m. 4356 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4357 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4358 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4359 (same value is used for all local rows) 4360 . d_nnz - array containing the number of nonzeros in the various rows of the 4361 DIAGONAL portion of the local submatrix (possibly different for each row) 4362 or NULL, if d_nz is used to specify the nonzero structure. 4363 The size of this array is equal to the number of local rows, i.e 'm'. 4364 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4365 submatrix (same value is used for all local rows). 4366 - o_nnz - array containing the number of nonzeros in the various rows of the 4367 OFF-DIAGONAL portion of the local submatrix (possibly different for 4368 each row) or NULL, if o_nz is used to specify the nonzero 4369 structure. The size of this array is equal to the number 4370 of local rows, i.e 'm'. 4371 4372 Output Parameter: 4373 . A - the matrix 4374 4375 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4376 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4377 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4378 4379 Notes: 4380 If the *_nnz parameter is given then the *_nz parameter is ignored 4381 4382 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4383 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4384 storage requirements for this matrix. 4385 4386 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4387 processor than it must be used on all processors that share the object for 4388 that argument. 4389 4390 The user MUST specify either the local or global matrix dimensions 4391 (possibly both). 4392 4393 The parallel matrix is partitioned across processors such that the 4394 first m0 rows belong to process 0, the next m1 rows belong to 4395 process 1, the next m2 rows belong to process 2 etc.. where 4396 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4397 values corresponding to [m x N] submatrix. 4398 4399 The columns are logically partitioned with the n0 columns belonging 4400 to 0th partition, the next n1 columns belonging to the next 4401 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4402 4403 The DIAGONAL portion of the local submatrix on any given processor 4404 is the submatrix corresponding to the rows and columns m,n 4405 corresponding to the given processor. i.e diagonal matrix on 4406 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4407 etc. The remaining portion of the local submatrix [m x (N-n)] 4408 constitute the OFF-DIAGONAL portion. The example below better 4409 illustrates this concept. 4410 4411 For a square global matrix we define each processor's diagonal portion 4412 to be its local rows and the corresponding columns (a square submatrix); 4413 each processor's off-diagonal portion encompasses the remainder of the 4414 local matrix (a rectangular submatrix). 4415 4416 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4417 4418 When calling this routine with a single process communicator, a matrix of 4419 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4420 type of communicator, use the construction mechanism 4421 .vb 4422 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4423 .ve 4424 4425 $ MatCreate(...,&A); 4426 $ MatSetType(A,MATMPIAIJ); 4427 $ MatSetSizes(A, m,n,M,N); 4428 $ MatMPIAIJSetPreallocation(A,...); 4429 4430 By default, this format uses inodes (identical nodes) when possible. 4431 We search for consecutive rows with the same nonzero structure, thereby 4432 reusing matrix information to achieve increased efficiency. 4433 4434 Options Database Keys: 4435 + -mat_no_inode - Do not use inodes 4436 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4437 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4438 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4439 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4440 4441 Example usage: 4442 4443 Consider the following 8x8 matrix with 34 non-zero values, that is 4444 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4445 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4446 as follows 4447 4448 .vb 4449 1 2 0 | 0 3 0 | 0 4 4450 Proc0 0 5 6 | 7 0 0 | 8 0 4451 9 0 10 | 11 0 0 | 12 0 4452 ------------------------------------- 4453 13 0 14 | 15 16 17 | 0 0 4454 Proc1 0 18 0 | 19 20 21 | 0 0 4455 0 0 0 | 22 23 0 | 24 0 4456 ------------------------------------- 4457 Proc2 25 26 27 | 0 0 28 | 29 0 4458 30 0 0 | 31 32 33 | 0 34 4459 .ve 4460 4461 This can be represented as a collection of submatrices as 4462 4463 .vb 4464 A B C 4465 D E F 4466 G H I 4467 .ve 4468 4469 Where the submatrices A,B,C are owned by proc0, D,E,F are 4470 owned by proc1, G,H,I are owned by proc2. 4471 4472 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4473 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4474 The 'M','N' parameters are 8,8, and have the same values on all procs. 4475 4476 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4477 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4478 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4479 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4480 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4481 matrix, ans [DF] as another SeqAIJ matrix. 4482 4483 When d_nz, o_nz parameters are specified, d_nz storage elements are 4484 allocated for every row of the local diagonal submatrix, and o_nz 4485 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4486 One way to choose d_nz and o_nz is to use the max nonzerors per local 4487 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4488 In this case, the values of d_nz,o_nz are 4489 .vb 4490 proc0 : dnz = 2, o_nz = 2 4491 proc1 : dnz = 3, o_nz = 2 4492 proc2 : dnz = 1, o_nz = 4 4493 .ve 4494 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4495 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4496 for proc3. i.e we are using 12+15+10=37 storage locations to store 4497 34 values. 4498 4499 When d_nnz, o_nnz parameters are specified, the storage is specified 4500 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4501 In the above case the values for d_nnz,o_nnz are 4502 .vb 4503 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4504 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4505 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4506 .ve 4507 Here the space allocated is sum of all the above values i.e 34, and 4508 hence pre-allocation is perfect. 4509 4510 Level: intermediate 4511 4512 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4513 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4514 @*/ 4515 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4516 { 4517 PetscMPIInt size; 4518 4519 PetscFunctionBegin; 4520 PetscCall(MatCreate(comm, A)); 4521 PetscCall(MatSetSizes(*A, m, n, M, N)); 4522 PetscCallMPI(MPI_Comm_size(comm, &size)); 4523 if (size > 1) { 4524 PetscCall(MatSetType(*A, MATMPIAIJ)); 4525 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4526 } else { 4527 PetscCall(MatSetType(*A, MATSEQAIJ)); 4528 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4529 } 4530 PetscFunctionReturn(0); 4531 } 4532 4533 /*@C 4534 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4535 4536 Not collective 4537 4538 Input Parameter: 4539 . A - The `MATMPIAIJ` matrix 4540 4541 Output Parameters: 4542 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4543 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4544 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4545 4546 Note: 4547 The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4548 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4549 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4550 local column numbers to global column numbers in the original matrix. 4551 4552 Level: intermediate 4553 4554 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4555 @*/ 4556 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4557 { 4558 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4559 PetscBool flg; 4560 4561 PetscFunctionBegin; 4562 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4563 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4564 if (Ad) *Ad = a->A; 4565 if (Ao) *Ao = a->B; 4566 if (colmap) *colmap = a->garray; 4567 PetscFunctionReturn(0); 4568 } 4569 4570 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4571 { 4572 PetscInt m, N, i, rstart, nnz, Ii; 4573 PetscInt *indx; 4574 PetscScalar *values; 4575 MatType rootType; 4576 4577 PetscFunctionBegin; 4578 PetscCall(MatGetSize(inmat, &m, &N)); 4579 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4580 PetscInt *dnz, *onz, sum, bs, cbs; 4581 4582 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4583 /* Check sum(n) = N */ 4584 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4585 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4586 4587 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4588 rstart -= m; 4589 4590 MatPreallocateBegin(comm, m, n, dnz, onz); 4591 for (i = 0; i < m; i++) { 4592 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4593 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4594 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4595 } 4596 4597 PetscCall(MatCreate(comm, outmat)); 4598 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4599 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4600 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4601 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4602 PetscCall(MatSetType(*outmat, rootType)); 4603 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4604 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4605 MatPreallocateEnd(dnz, onz); 4606 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4607 } 4608 4609 /* numeric phase */ 4610 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4611 for (i = 0; i < m; i++) { 4612 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4613 Ii = i + rstart; 4614 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4615 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4616 } 4617 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4618 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4619 PetscFunctionReturn(0); 4620 } 4621 4622 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4623 { 4624 PetscMPIInt rank; 4625 PetscInt m, N, i, rstart, nnz; 4626 size_t len; 4627 const PetscInt *indx; 4628 PetscViewer out; 4629 char *name; 4630 Mat B; 4631 const PetscScalar *values; 4632 4633 PetscFunctionBegin; 4634 PetscCall(MatGetLocalSize(A, &m, NULL)); 4635 PetscCall(MatGetSize(A, NULL, &N)); 4636 /* Should this be the type of the diagonal block of A? */ 4637 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4638 PetscCall(MatSetSizes(B, m, N, m, N)); 4639 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4640 PetscCall(MatSetType(B, MATSEQAIJ)); 4641 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4642 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4643 for (i = 0; i < m; i++) { 4644 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4645 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4646 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4647 } 4648 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4649 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4650 4651 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4652 PetscCall(PetscStrlen(outfile, &len)); 4653 PetscCall(PetscMalloc1(len + 6, &name)); 4654 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4655 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4656 PetscCall(PetscFree(name)); 4657 PetscCall(MatView(B, out)); 4658 PetscCall(PetscViewerDestroy(&out)); 4659 PetscCall(MatDestroy(&B)); 4660 PetscFunctionReturn(0); 4661 } 4662 4663 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4664 { 4665 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4666 4667 PetscFunctionBegin; 4668 if (!merge) PetscFunctionReturn(0); 4669 PetscCall(PetscFree(merge->id_r)); 4670 PetscCall(PetscFree(merge->len_s)); 4671 PetscCall(PetscFree(merge->len_r)); 4672 PetscCall(PetscFree(merge->bi)); 4673 PetscCall(PetscFree(merge->bj)); 4674 PetscCall(PetscFree(merge->buf_ri[0])); 4675 PetscCall(PetscFree(merge->buf_ri)); 4676 PetscCall(PetscFree(merge->buf_rj[0])); 4677 PetscCall(PetscFree(merge->buf_rj)); 4678 PetscCall(PetscFree(merge->coi)); 4679 PetscCall(PetscFree(merge->coj)); 4680 PetscCall(PetscFree(merge->owners_co)); 4681 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4682 PetscCall(PetscFree(merge)); 4683 PetscFunctionReturn(0); 4684 } 4685 4686 #include <../src/mat/utils/freespace.h> 4687 #include <petscbt.h> 4688 4689 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4690 { 4691 MPI_Comm comm; 4692 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4693 PetscMPIInt size, rank, taga, *len_s; 4694 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4695 PetscInt proc, m; 4696 PetscInt **buf_ri, **buf_rj; 4697 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4698 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4699 MPI_Request *s_waits, *r_waits; 4700 MPI_Status *status; 4701 const MatScalar *aa, *a_a; 4702 MatScalar **abuf_r, *ba_i; 4703 Mat_Merge_SeqsToMPI *merge; 4704 PetscContainer container; 4705 4706 PetscFunctionBegin; 4707 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4708 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4709 4710 PetscCallMPI(MPI_Comm_size(comm, &size)); 4711 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4712 4713 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4714 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4715 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4716 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4717 aa = a_a; 4718 4719 bi = merge->bi; 4720 bj = merge->bj; 4721 buf_ri = merge->buf_ri; 4722 buf_rj = merge->buf_rj; 4723 4724 PetscCall(PetscMalloc1(size, &status)); 4725 owners = merge->rowmap->range; 4726 len_s = merge->len_s; 4727 4728 /* send and recv matrix values */ 4729 /*-----------------------------*/ 4730 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4731 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4732 4733 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4734 for (proc = 0, k = 0; proc < size; proc++) { 4735 if (!len_s[proc]) continue; 4736 i = owners[proc]; 4737 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4738 k++; 4739 } 4740 4741 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4742 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4743 PetscCall(PetscFree(status)); 4744 4745 PetscCall(PetscFree(s_waits)); 4746 PetscCall(PetscFree(r_waits)); 4747 4748 /* insert mat values of mpimat */ 4749 /*----------------------------*/ 4750 PetscCall(PetscMalloc1(N, &ba_i)); 4751 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4752 4753 for (k = 0; k < merge->nrecv; k++) { 4754 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4755 nrows = *(buf_ri_k[k]); 4756 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4757 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4758 } 4759 4760 /* set values of ba */ 4761 m = merge->rowmap->n; 4762 for (i = 0; i < m; i++) { 4763 arow = owners[rank] + i; 4764 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4765 bnzi = bi[i + 1] - bi[i]; 4766 PetscCall(PetscArrayzero(ba_i, bnzi)); 4767 4768 /* add local non-zero vals of this proc's seqmat into ba */ 4769 anzi = ai[arow + 1] - ai[arow]; 4770 aj = a->j + ai[arow]; 4771 aa = a_a + ai[arow]; 4772 nextaj = 0; 4773 for (j = 0; nextaj < anzi; j++) { 4774 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4775 ba_i[j] += aa[nextaj++]; 4776 } 4777 } 4778 4779 /* add received vals into ba */ 4780 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4781 /* i-th row */ 4782 if (i == *nextrow[k]) { 4783 anzi = *(nextai[k] + 1) - *nextai[k]; 4784 aj = buf_rj[k] + *(nextai[k]); 4785 aa = abuf_r[k] + *(nextai[k]); 4786 nextaj = 0; 4787 for (j = 0; nextaj < anzi; j++) { 4788 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4789 ba_i[j] += aa[nextaj++]; 4790 } 4791 } 4792 nextrow[k]++; 4793 nextai[k]++; 4794 } 4795 } 4796 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4797 } 4798 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4799 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4800 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4801 4802 PetscCall(PetscFree(abuf_r[0])); 4803 PetscCall(PetscFree(abuf_r)); 4804 PetscCall(PetscFree(ba_i)); 4805 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4806 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4807 PetscFunctionReturn(0); 4808 } 4809 4810 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4811 { 4812 Mat B_mpi; 4813 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4814 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4815 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4816 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4817 PetscInt len, proc, *dnz, *onz, bs, cbs; 4818 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4819 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4820 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4821 MPI_Status *status; 4822 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4823 PetscBT lnkbt; 4824 Mat_Merge_SeqsToMPI *merge; 4825 PetscContainer container; 4826 4827 PetscFunctionBegin; 4828 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4829 4830 /* make sure it is a PETSc comm */ 4831 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4832 PetscCallMPI(MPI_Comm_size(comm, &size)); 4833 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4834 4835 PetscCall(PetscNew(&merge)); 4836 PetscCall(PetscMalloc1(size, &status)); 4837 4838 /* determine row ownership */ 4839 /*---------------------------------------------------------*/ 4840 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4841 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4842 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4843 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4844 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4845 PetscCall(PetscMalloc1(size, &len_si)); 4846 PetscCall(PetscMalloc1(size, &merge->len_s)); 4847 4848 m = merge->rowmap->n; 4849 owners = merge->rowmap->range; 4850 4851 /* determine the number of messages to send, their lengths */ 4852 /*---------------------------------------------------------*/ 4853 len_s = merge->len_s; 4854 4855 len = 0; /* length of buf_si[] */ 4856 merge->nsend = 0; 4857 for (proc = 0; proc < size; proc++) { 4858 len_si[proc] = 0; 4859 if (proc == rank) { 4860 len_s[proc] = 0; 4861 } else { 4862 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4863 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4864 } 4865 if (len_s[proc]) { 4866 merge->nsend++; 4867 nrows = 0; 4868 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4869 if (ai[i + 1] > ai[i]) nrows++; 4870 } 4871 len_si[proc] = 2 * (nrows + 1); 4872 len += len_si[proc]; 4873 } 4874 } 4875 4876 /* determine the number and length of messages to receive for ij-structure */ 4877 /*-------------------------------------------------------------------------*/ 4878 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4879 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4880 4881 /* post the Irecv of j-structure */ 4882 /*-------------------------------*/ 4883 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4884 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4885 4886 /* post the Isend of j-structure */ 4887 /*--------------------------------*/ 4888 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4889 4890 for (proc = 0, k = 0; proc < size; proc++) { 4891 if (!len_s[proc]) continue; 4892 i = owners[proc]; 4893 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4894 k++; 4895 } 4896 4897 /* receives and sends of j-structure are complete */ 4898 /*------------------------------------------------*/ 4899 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4900 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4901 4902 /* send and recv i-structure */ 4903 /*---------------------------*/ 4904 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4905 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4906 4907 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4908 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4909 for (proc = 0, k = 0; proc < size; proc++) { 4910 if (!len_s[proc]) continue; 4911 /* form outgoing message for i-structure: 4912 buf_si[0]: nrows to be sent 4913 [1:nrows]: row index (global) 4914 [nrows+1:2*nrows+1]: i-structure index 4915 */ 4916 /*-------------------------------------------*/ 4917 nrows = len_si[proc] / 2 - 1; 4918 buf_si_i = buf_si + nrows + 1; 4919 buf_si[0] = nrows; 4920 buf_si_i[0] = 0; 4921 nrows = 0; 4922 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4923 anzi = ai[i + 1] - ai[i]; 4924 if (anzi) { 4925 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4926 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4927 nrows++; 4928 } 4929 } 4930 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4931 k++; 4932 buf_si += len_si[proc]; 4933 } 4934 4935 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4936 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4937 4938 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4939 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4940 4941 PetscCall(PetscFree(len_si)); 4942 PetscCall(PetscFree(len_ri)); 4943 PetscCall(PetscFree(rj_waits)); 4944 PetscCall(PetscFree2(si_waits, sj_waits)); 4945 PetscCall(PetscFree(ri_waits)); 4946 PetscCall(PetscFree(buf_s)); 4947 PetscCall(PetscFree(status)); 4948 4949 /* compute a local seq matrix in each processor */ 4950 /*----------------------------------------------*/ 4951 /* allocate bi array and free space for accumulating nonzero column info */ 4952 PetscCall(PetscMalloc1(m + 1, &bi)); 4953 bi[0] = 0; 4954 4955 /* create and initialize a linked list */ 4956 nlnk = N + 1; 4957 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4958 4959 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4960 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4961 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4962 4963 current_space = free_space; 4964 4965 /* determine symbolic info for each local row */ 4966 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4967 4968 for (k = 0; k < merge->nrecv; k++) { 4969 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4970 nrows = *buf_ri_k[k]; 4971 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4972 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4973 } 4974 4975 MatPreallocateBegin(comm, m, n, dnz, onz); 4976 len = 0; 4977 for (i = 0; i < m; i++) { 4978 bnzi = 0; 4979 /* add local non-zero cols of this proc's seqmat into lnk */ 4980 arow = owners[rank] + i; 4981 anzi = ai[arow + 1] - ai[arow]; 4982 aj = a->j + ai[arow]; 4983 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4984 bnzi += nlnk; 4985 /* add received col data into lnk */ 4986 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4987 if (i == *nextrow[k]) { /* i-th row */ 4988 anzi = *(nextai[k] + 1) - *nextai[k]; 4989 aj = buf_rj[k] + *nextai[k]; 4990 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4991 bnzi += nlnk; 4992 nextrow[k]++; 4993 nextai[k]++; 4994 } 4995 } 4996 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4997 4998 /* if free space is not available, make more free space */ 4999 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5000 /* copy data into free space, then initialize lnk */ 5001 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5002 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5003 5004 current_space->array += bnzi; 5005 current_space->local_used += bnzi; 5006 current_space->local_remaining -= bnzi; 5007 5008 bi[i + 1] = bi[i] + bnzi; 5009 } 5010 5011 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5012 5013 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5014 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5015 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5016 5017 /* create symbolic parallel matrix B_mpi */ 5018 /*---------------------------------------*/ 5019 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5020 PetscCall(MatCreate(comm, &B_mpi)); 5021 if (n == PETSC_DECIDE) { 5022 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5023 } else { 5024 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5025 } 5026 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5027 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5028 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5029 MatPreallocateEnd(dnz, onz); 5030 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5031 5032 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5033 B_mpi->assembled = PETSC_FALSE; 5034 merge->bi = bi; 5035 merge->bj = bj; 5036 merge->buf_ri = buf_ri; 5037 merge->buf_rj = buf_rj; 5038 merge->coi = NULL; 5039 merge->coj = NULL; 5040 merge->owners_co = NULL; 5041 5042 PetscCall(PetscCommDestroy(&comm)); 5043 5044 /* attach the supporting struct to B_mpi for reuse */ 5045 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5046 PetscCall(PetscContainerSetPointer(container, merge)); 5047 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5048 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5049 PetscCall(PetscContainerDestroy(&container)); 5050 *mpimat = B_mpi; 5051 5052 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5053 PetscFunctionReturn(0); 5054 } 5055 5056 /*@C 5057 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5058 matrices from each processor 5059 5060 Collective 5061 5062 Input Parameters: 5063 + comm - the communicators the parallel matrix will live on 5064 . seqmat - the input sequential matrices 5065 . m - number of local rows (or `PETSC_DECIDE`) 5066 . n - number of local columns (or `PETSC_DECIDE`) 5067 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5068 5069 Output Parameter: 5070 . mpimat - the parallel matrix generated 5071 5072 Level: advanced 5073 5074 Note: 5075 The dimensions of the sequential matrix in each processor MUST be the same. 5076 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5077 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5078 @*/ 5079 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5080 { 5081 PetscMPIInt size; 5082 5083 PetscFunctionBegin; 5084 PetscCallMPI(MPI_Comm_size(comm, &size)); 5085 if (size == 1) { 5086 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5087 if (scall == MAT_INITIAL_MATRIX) { 5088 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5089 } else { 5090 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5091 } 5092 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5093 PetscFunctionReturn(0); 5094 } 5095 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5096 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5097 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5098 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5099 PetscFunctionReturn(0); 5100 } 5101 5102 /*@ 5103 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5104 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5105 with `MatGetSize()` 5106 5107 Not Collective 5108 5109 Input Parameters: 5110 + A - the matrix 5111 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5112 5113 Output Parameter: 5114 . A_loc - the local sequential matrix generated 5115 5116 Level: developer 5117 5118 Notes: 5119 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5120 5121 Destroy the matrix with `MatDestroy()` 5122 5123 .seealso: `MatMPIAIJGetLocalMat()` 5124 @*/ 5125 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5126 { 5127 PetscBool mpi; 5128 5129 PetscFunctionBegin; 5130 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5131 if (mpi) { 5132 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5133 } else { 5134 *A_loc = A; 5135 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5136 } 5137 PetscFunctionReturn(0); 5138 } 5139 5140 /*@ 5141 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5142 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5143 with `MatGetSize()` 5144 5145 Not Collective 5146 5147 Input Parameters: 5148 + A - the matrix 5149 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5150 5151 Output Parameter: 5152 . A_loc - the local sequential matrix generated 5153 5154 Level: developer 5155 5156 Notes: 5157 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5158 5159 When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A. 5160 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called. 5161 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5162 modify the values of the returned A_loc. 5163 5164 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5165 @*/ 5166 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5167 { 5168 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5169 Mat_SeqAIJ *mat, *a, *b; 5170 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5171 const PetscScalar *aa, *ba, *aav, *bav; 5172 PetscScalar *ca, *cam; 5173 PetscMPIInt size; 5174 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5175 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5176 PetscBool match; 5177 5178 PetscFunctionBegin; 5179 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5180 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5181 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5182 if (size == 1) { 5183 if (scall == MAT_INITIAL_MATRIX) { 5184 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5185 *A_loc = mpimat->A; 5186 } else if (scall == MAT_REUSE_MATRIX) { 5187 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5188 } 5189 PetscFunctionReturn(0); 5190 } 5191 5192 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5193 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5194 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5195 ai = a->i; 5196 aj = a->j; 5197 bi = b->i; 5198 bj = b->j; 5199 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5200 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5201 aa = aav; 5202 ba = bav; 5203 if (scall == MAT_INITIAL_MATRIX) { 5204 PetscCall(PetscMalloc1(1 + am, &ci)); 5205 ci[0] = 0; 5206 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5207 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5208 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5209 k = 0; 5210 for (i = 0; i < am; i++) { 5211 ncols_o = bi[i + 1] - bi[i]; 5212 ncols_d = ai[i + 1] - ai[i]; 5213 /* off-diagonal portion of A */ 5214 for (jo = 0; jo < ncols_o; jo++) { 5215 col = cmap[*bj]; 5216 if (col >= cstart) break; 5217 cj[k] = col; 5218 bj++; 5219 ca[k++] = *ba++; 5220 } 5221 /* diagonal portion of A */ 5222 for (j = 0; j < ncols_d; j++) { 5223 cj[k] = cstart + *aj++; 5224 ca[k++] = *aa++; 5225 } 5226 /* off-diagonal portion of A */ 5227 for (j = jo; j < ncols_o; j++) { 5228 cj[k] = cmap[*bj++]; 5229 ca[k++] = *ba++; 5230 } 5231 } 5232 /* put together the new matrix */ 5233 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5234 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5235 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5236 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5237 mat->free_a = PETSC_TRUE; 5238 mat->free_ij = PETSC_TRUE; 5239 mat->nonew = 0; 5240 } else if (scall == MAT_REUSE_MATRIX) { 5241 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5242 ci = mat->i; 5243 cj = mat->j; 5244 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5245 for (i = 0; i < am; i++) { 5246 /* off-diagonal portion of A */ 5247 ncols_o = bi[i + 1] - bi[i]; 5248 for (jo = 0; jo < ncols_o; jo++) { 5249 col = cmap[*bj]; 5250 if (col >= cstart) break; 5251 *cam++ = *ba++; 5252 bj++; 5253 } 5254 /* diagonal portion of A */ 5255 ncols_d = ai[i + 1] - ai[i]; 5256 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5257 /* off-diagonal portion of A */ 5258 for (j = jo; j < ncols_o; j++) { 5259 *cam++ = *ba++; 5260 bj++; 5261 } 5262 } 5263 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5264 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5265 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5266 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5267 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5268 PetscFunctionReturn(0); 5269 } 5270 5271 /*@ 5272 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5273 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5274 5275 Not Collective 5276 5277 Input Parameters: 5278 + A - the matrix 5279 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5280 5281 Output Parameters: 5282 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5283 - A_loc - the local sequential matrix generated 5284 5285 Level: developer 5286 5287 Note: 5288 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering) 5289 5290 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5291 @*/ 5292 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5293 { 5294 Mat Ao, Ad; 5295 const PetscInt *cmap; 5296 PetscMPIInt size; 5297 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5298 5299 PetscFunctionBegin; 5300 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5301 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5302 if (size == 1) { 5303 if (scall == MAT_INITIAL_MATRIX) { 5304 PetscCall(PetscObjectReference((PetscObject)Ad)); 5305 *A_loc = Ad; 5306 } else if (scall == MAT_REUSE_MATRIX) { 5307 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5308 } 5309 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5310 PetscFunctionReturn(0); 5311 } 5312 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5313 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5314 if (f) { 5315 PetscCall((*f)(A, scall, glob, A_loc)); 5316 } else { 5317 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5318 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5319 Mat_SeqAIJ *c; 5320 PetscInt *ai = a->i, *aj = a->j; 5321 PetscInt *bi = b->i, *bj = b->j; 5322 PetscInt *ci, *cj; 5323 const PetscScalar *aa, *ba; 5324 PetscScalar *ca; 5325 PetscInt i, j, am, dn, on; 5326 5327 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5328 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5329 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5330 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5331 if (scall == MAT_INITIAL_MATRIX) { 5332 PetscInt k; 5333 PetscCall(PetscMalloc1(1 + am, &ci)); 5334 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5335 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5336 ci[0] = 0; 5337 for (i = 0, k = 0; i < am; i++) { 5338 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5339 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5340 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5341 /* diagonal portion of A */ 5342 for (j = 0; j < ncols_d; j++, k++) { 5343 cj[k] = *aj++; 5344 ca[k] = *aa++; 5345 } 5346 /* off-diagonal portion of A */ 5347 for (j = 0; j < ncols_o; j++, k++) { 5348 cj[k] = dn + *bj++; 5349 ca[k] = *ba++; 5350 } 5351 } 5352 /* put together the new matrix */ 5353 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5354 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5355 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5356 c = (Mat_SeqAIJ *)(*A_loc)->data; 5357 c->free_a = PETSC_TRUE; 5358 c->free_ij = PETSC_TRUE; 5359 c->nonew = 0; 5360 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5361 } else if (scall == MAT_REUSE_MATRIX) { 5362 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5363 for (i = 0; i < am; i++) { 5364 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5365 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5366 /* diagonal portion of A */ 5367 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5368 /* off-diagonal portion of A */ 5369 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5370 } 5371 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5372 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5373 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5374 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5375 if (glob) { 5376 PetscInt cst, *gidx; 5377 5378 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5379 PetscCall(PetscMalloc1(dn + on, &gidx)); 5380 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5381 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5382 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5383 } 5384 } 5385 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5386 PetscFunctionReturn(0); 5387 } 5388 5389 /*@C 5390 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5391 5392 Not Collective 5393 5394 Input Parameters: 5395 + A - the matrix 5396 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5397 - row, col - index sets of rows and columns to extract (or NULL) 5398 5399 Output Parameter: 5400 . A_loc - the local sequential matrix generated 5401 5402 Level: developer 5403 5404 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5405 @*/ 5406 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5407 { 5408 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5409 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5410 IS isrowa, iscola; 5411 Mat *aloc; 5412 PetscBool match; 5413 5414 PetscFunctionBegin; 5415 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5416 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5417 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5418 if (!row) { 5419 start = A->rmap->rstart; 5420 end = A->rmap->rend; 5421 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5422 } else { 5423 isrowa = *row; 5424 } 5425 if (!col) { 5426 start = A->cmap->rstart; 5427 cmap = a->garray; 5428 nzA = a->A->cmap->n; 5429 nzB = a->B->cmap->n; 5430 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5431 ncols = 0; 5432 for (i = 0; i < nzB; i++) { 5433 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5434 else break; 5435 } 5436 imark = i; 5437 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5438 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5439 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5440 } else { 5441 iscola = *col; 5442 } 5443 if (scall != MAT_INITIAL_MATRIX) { 5444 PetscCall(PetscMalloc1(1, &aloc)); 5445 aloc[0] = *A_loc; 5446 } 5447 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5448 if (!col) { /* attach global id of condensed columns */ 5449 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5450 } 5451 *A_loc = aloc[0]; 5452 PetscCall(PetscFree(aloc)); 5453 if (!row) PetscCall(ISDestroy(&isrowa)); 5454 if (!col) PetscCall(ISDestroy(&iscola)); 5455 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5456 PetscFunctionReturn(0); 5457 } 5458 5459 /* 5460 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5461 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5462 * on a global size. 5463 * */ 5464 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5465 { 5466 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5467 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5468 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5469 PetscMPIInt owner; 5470 PetscSFNode *iremote, *oiremote; 5471 const PetscInt *lrowindices; 5472 PetscSF sf, osf; 5473 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5474 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5475 MPI_Comm comm; 5476 ISLocalToGlobalMapping mapping; 5477 const PetscScalar *pd_a, *po_a; 5478 5479 PetscFunctionBegin; 5480 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5481 /* plocalsize is the number of roots 5482 * nrows is the number of leaves 5483 * */ 5484 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5485 PetscCall(ISGetLocalSize(rows, &nrows)); 5486 PetscCall(PetscCalloc1(nrows, &iremote)); 5487 PetscCall(ISGetIndices(rows, &lrowindices)); 5488 for (i = 0; i < nrows; i++) { 5489 /* Find a remote index and an owner for a row 5490 * The row could be local or remote 5491 * */ 5492 owner = 0; 5493 lidx = 0; 5494 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5495 iremote[i].index = lidx; 5496 iremote[i].rank = owner; 5497 } 5498 /* Create SF to communicate how many nonzero columns for each row */ 5499 PetscCall(PetscSFCreate(comm, &sf)); 5500 /* SF will figure out the number of nonzero colunms for each row, and their 5501 * offsets 5502 * */ 5503 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5504 PetscCall(PetscSFSetFromOptions(sf)); 5505 PetscCall(PetscSFSetUp(sf)); 5506 5507 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5508 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5509 PetscCall(PetscCalloc1(nrows, &pnnz)); 5510 roffsets[0] = 0; 5511 roffsets[1] = 0; 5512 for (i = 0; i < plocalsize; i++) { 5513 /* diag */ 5514 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5515 /* off diag */ 5516 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5517 /* compute offsets so that we relative location for each row */ 5518 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5519 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5520 } 5521 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5522 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5523 /* 'r' means root, and 'l' means leaf */ 5524 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5525 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5526 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5527 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5528 PetscCall(PetscSFDestroy(&sf)); 5529 PetscCall(PetscFree(roffsets)); 5530 PetscCall(PetscFree(nrcols)); 5531 dntotalcols = 0; 5532 ontotalcols = 0; 5533 ncol = 0; 5534 for (i = 0; i < nrows; i++) { 5535 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5536 ncol = PetscMax(pnnz[i], ncol); 5537 /* diag */ 5538 dntotalcols += nlcols[i * 2 + 0]; 5539 /* off diag */ 5540 ontotalcols += nlcols[i * 2 + 1]; 5541 } 5542 /* We do not need to figure the right number of columns 5543 * since all the calculations will be done by going through the raw data 5544 * */ 5545 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5546 PetscCall(MatSetUp(*P_oth)); 5547 PetscCall(PetscFree(pnnz)); 5548 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5549 /* diag */ 5550 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5551 /* off diag */ 5552 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5553 /* diag */ 5554 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5555 /* off diag */ 5556 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5557 dntotalcols = 0; 5558 ontotalcols = 0; 5559 ntotalcols = 0; 5560 for (i = 0; i < nrows; i++) { 5561 owner = 0; 5562 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5563 /* Set iremote for diag matrix */ 5564 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5565 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5566 iremote[dntotalcols].rank = owner; 5567 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5568 ilocal[dntotalcols++] = ntotalcols++; 5569 } 5570 /* off diag */ 5571 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5572 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5573 oiremote[ontotalcols].rank = owner; 5574 oilocal[ontotalcols++] = ntotalcols++; 5575 } 5576 } 5577 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5578 PetscCall(PetscFree(loffsets)); 5579 PetscCall(PetscFree(nlcols)); 5580 PetscCall(PetscSFCreate(comm, &sf)); 5581 /* P serves as roots and P_oth is leaves 5582 * Diag matrix 5583 * */ 5584 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5585 PetscCall(PetscSFSetFromOptions(sf)); 5586 PetscCall(PetscSFSetUp(sf)); 5587 5588 PetscCall(PetscSFCreate(comm, &osf)); 5589 /* Off diag */ 5590 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5591 PetscCall(PetscSFSetFromOptions(osf)); 5592 PetscCall(PetscSFSetUp(osf)); 5593 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5594 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5595 /* We operate on the matrix internal data for saving memory */ 5596 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5597 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5598 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5599 /* Convert to global indices for diag matrix */ 5600 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5601 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5602 /* We want P_oth store global indices */ 5603 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5604 /* Use memory scalable approach */ 5605 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5606 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5607 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5608 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5609 /* Convert back to local indices */ 5610 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5611 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5612 nout = 0; 5613 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5614 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5615 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5616 /* Exchange values */ 5617 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5618 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5619 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5620 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5621 /* Stop PETSc from shrinking memory */ 5622 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5623 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5624 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5625 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5626 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5627 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5628 PetscCall(PetscSFDestroy(&sf)); 5629 PetscCall(PetscSFDestroy(&osf)); 5630 PetscFunctionReturn(0); 5631 } 5632 5633 /* 5634 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5635 * This supports MPIAIJ and MAIJ 5636 * */ 5637 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5638 { 5639 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5640 Mat_SeqAIJ *p_oth; 5641 IS rows, map; 5642 PetscHMapI hamp; 5643 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5644 MPI_Comm comm; 5645 PetscSF sf, osf; 5646 PetscBool has; 5647 5648 PetscFunctionBegin; 5649 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5650 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5651 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5652 * and then create a submatrix (that often is an overlapping matrix) 5653 * */ 5654 if (reuse == MAT_INITIAL_MATRIX) { 5655 /* Use a hash table to figure out unique keys */ 5656 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5657 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5658 count = 0; 5659 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5660 for (i = 0; i < a->B->cmap->n; i++) { 5661 key = a->garray[i] / dof; 5662 PetscCall(PetscHMapIHas(hamp, key, &has)); 5663 if (!has) { 5664 mapping[i] = count; 5665 PetscCall(PetscHMapISet(hamp, key, count++)); 5666 } else { 5667 /* Current 'i' has the same value the previous step */ 5668 mapping[i] = count - 1; 5669 } 5670 } 5671 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5672 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5673 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5674 PetscCall(PetscCalloc1(htsize, &rowindices)); 5675 off = 0; 5676 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5677 PetscCall(PetscHMapIDestroy(&hamp)); 5678 PetscCall(PetscSortInt(htsize, rowindices)); 5679 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5680 /* In case, the matrix was already created but users want to recreate the matrix */ 5681 PetscCall(MatDestroy(P_oth)); 5682 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5683 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5684 PetscCall(ISDestroy(&map)); 5685 PetscCall(ISDestroy(&rows)); 5686 } else if (reuse == MAT_REUSE_MATRIX) { 5687 /* If matrix was already created, we simply update values using SF objects 5688 * that as attached to the matrix ealier. 5689 */ 5690 const PetscScalar *pd_a, *po_a; 5691 5692 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5693 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5694 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5695 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5696 /* Update values in place */ 5697 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5698 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5699 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5700 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5701 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5702 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5703 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5704 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5705 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5706 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5707 PetscFunctionReturn(0); 5708 } 5709 5710 /*@C 5711 MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A 5712 5713 Collective on A 5714 5715 Input Parameters: 5716 + A - the first matrix in `MATMPIAIJ` format 5717 . B - the second matrix in `MATMPIAIJ` format 5718 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5719 5720 Output Parameters: 5721 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5722 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5723 - B_seq - the sequential matrix generated 5724 5725 Level: developer 5726 5727 @*/ 5728 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5729 { 5730 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5731 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5732 IS isrowb, iscolb; 5733 Mat *bseq = NULL; 5734 5735 PetscFunctionBegin; 5736 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5737 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5738 } 5739 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5740 5741 if (scall == MAT_INITIAL_MATRIX) { 5742 start = A->cmap->rstart; 5743 cmap = a->garray; 5744 nzA = a->A->cmap->n; 5745 nzB = a->B->cmap->n; 5746 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5747 ncols = 0; 5748 for (i = 0; i < nzB; i++) { /* row < local row index */ 5749 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5750 else break; 5751 } 5752 imark = i; 5753 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5754 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5755 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5756 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5757 } else { 5758 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5759 isrowb = *rowb; 5760 iscolb = *colb; 5761 PetscCall(PetscMalloc1(1, &bseq)); 5762 bseq[0] = *B_seq; 5763 } 5764 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5765 *B_seq = bseq[0]; 5766 PetscCall(PetscFree(bseq)); 5767 if (!rowb) { 5768 PetscCall(ISDestroy(&isrowb)); 5769 } else { 5770 *rowb = isrowb; 5771 } 5772 if (!colb) { 5773 PetscCall(ISDestroy(&iscolb)); 5774 } else { 5775 *colb = iscolb; 5776 } 5777 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5778 PetscFunctionReturn(0); 5779 } 5780 5781 /* 5782 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5783 of the OFF-DIAGONAL portion of local A 5784 5785 Collective on Mat 5786 5787 Input Parameters: 5788 + A,B - the matrices in mpiaij format 5789 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5790 5791 Output Parameter: 5792 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5793 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5794 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5795 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5796 5797 Developer Note: 5798 This directly accesses information inside the VecScatter associated with the matrix-vector product 5799 for this matrix. This is not desirable.. 5800 5801 Level: developer 5802 5803 */ 5804 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5805 { 5806 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5807 Mat_SeqAIJ *b_oth; 5808 VecScatter ctx; 5809 MPI_Comm comm; 5810 const PetscMPIInt *rprocs, *sprocs; 5811 const PetscInt *srow, *rstarts, *sstarts; 5812 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5813 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5814 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5815 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5816 PetscMPIInt size, tag, rank, nreqs; 5817 5818 PetscFunctionBegin; 5819 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5820 PetscCallMPI(MPI_Comm_size(comm, &size)); 5821 5822 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5823 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5824 } 5825 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5826 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5827 5828 if (size == 1) { 5829 startsj_s = NULL; 5830 bufa_ptr = NULL; 5831 *B_oth = NULL; 5832 PetscFunctionReturn(0); 5833 } 5834 5835 ctx = a->Mvctx; 5836 tag = ((PetscObject)ctx)->tag; 5837 5838 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5839 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5840 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5841 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5842 PetscCall(PetscMalloc1(nreqs, &reqs)); 5843 rwaits = reqs; 5844 swaits = reqs + nrecvs; 5845 5846 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5847 if (scall == MAT_INITIAL_MATRIX) { 5848 /* i-array */ 5849 /*---------*/ 5850 /* post receives */ 5851 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5852 for (i = 0; i < nrecvs; i++) { 5853 rowlen = rvalues + rstarts[i] * rbs; 5854 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5855 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5856 } 5857 5858 /* pack the outgoing message */ 5859 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5860 5861 sstartsj[0] = 0; 5862 rstartsj[0] = 0; 5863 len = 0; /* total length of j or a array to be sent */ 5864 if (nsends) { 5865 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5866 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5867 } 5868 for (i = 0; i < nsends; i++) { 5869 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5870 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5871 for (j = 0; j < nrows; j++) { 5872 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5873 for (l = 0; l < sbs; l++) { 5874 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5875 5876 rowlen[j * sbs + l] = ncols; 5877 5878 len += ncols; 5879 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5880 } 5881 k++; 5882 } 5883 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5884 5885 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5886 } 5887 /* recvs and sends of i-array are completed */ 5888 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5889 PetscCall(PetscFree(svalues)); 5890 5891 /* allocate buffers for sending j and a arrays */ 5892 PetscCall(PetscMalloc1(len + 1, &bufj)); 5893 PetscCall(PetscMalloc1(len + 1, &bufa)); 5894 5895 /* create i-array of B_oth */ 5896 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5897 5898 b_othi[0] = 0; 5899 len = 0; /* total length of j or a array to be received */ 5900 k = 0; 5901 for (i = 0; i < nrecvs; i++) { 5902 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5903 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5904 for (j = 0; j < nrows; j++) { 5905 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5906 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5907 k++; 5908 } 5909 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5910 } 5911 PetscCall(PetscFree(rvalues)); 5912 5913 /* allocate space for j and a arrays of B_oth */ 5914 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5915 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5916 5917 /* j-array */ 5918 /*---------*/ 5919 /* post receives of j-array */ 5920 for (i = 0; i < nrecvs; i++) { 5921 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5922 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5923 } 5924 5925 /* pack the outgoing message j-array */ 5926 if (nsends) k = sstarts[0]; 5927 for (i = 0; i < nsends; i++) { 5928 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5929 bufJ = bufj + sstartsj[i]; 5930 for (j = 0; j < nrows; j++) { 5931 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5932 for (ll = 0; ll < sbs; ll++) { 5933 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5934 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5935 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5936 } 5937 } 5938 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5939 } 5940 5941 /* recvs and sends of j-array are completed */ 5942 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5943 } else if (scall == MAT_REUSE_MATRIX) { 5944 sstartsj = *startsj_s; 5945 rstartsj = *startsj_r; 5946 bufa = *bufa_ptr; 5947 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5948 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5949 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5950 5951 /* a-array */ 5952 /*---------*/ 5953 /* post receives of a-array */ 5954 for (i = 0; i < nrecvs; i++) { 5955 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5956 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5957 } 5958 5959 /* pack the outgoing message a-array */ 5960 if (nsends) k = sstarts[0]; 5961 for (i = 0; i < nsends; i++) { 5962 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5963 bufA = bufa + sstartsj[i]; 5964 for (j = 0; j < nrows; j++) { 5965 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5966 for (ll = 0; ll < sbs; ll++) { 5967 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5968 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5969 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5970 } 5971 } 5972 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5973 } 5974 /* recvs and sends of a-array are completed */ 5975 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5976 PetscCall(PetscFree(reqs)); 5977 5978 if (scall == MAT_INITIAL_MATRIX) { 5979 /* put together the new matrix */ 5980 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5981 5982 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5983 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5984 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5985 b_oth->free_a = PETSC_TRUE; 5986 b_oth->free_ij = PETSC_TRUE; 5987 b_oth->nonew = 0; 5988 5989 PetscCall(PetscFree(bufj)); 5990 if (!startsj_s || !bufa_ptr) { 5991 PetscCall(PetscFree2(sstartsj, rstartsj)); 5992 PetscCall(PetscFree(bufa_ptr)); 5993 } else { 5994 *startsj_s = sstartsj; 5995 *startsj_r = rstartsj; 5996 *bufa_ptr = bufa; 5997 } 5998 } else if (scall == MAT_REUSE_MATRIX) { 5999 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6000 } 6001 6002 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6003 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6004 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6005 PetscFunctionReturn(0); 6006 } 6007 6008 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6009 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6010 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6011 #if defined(PETSC_HAVE_MKL_SPARSE) 6012 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6013 #endif 6014 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6015 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6016 #if defined(PETSC_HAVE_ELEMENTAL) 6017 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6018 #endif 6019 #if defined(PETSC_HAVE_SCALAPACK) 6020 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6021 #endif 6022 #if defined(PETSC_HAVE_HYPRE) 6023 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6024 #endif 6025 #if defined(PETSC_HAVE_CUDA) 6026 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6027 #endif 6028 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6030 #endif 6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6032 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6033 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6034 6035 /* 6036 Computes (B'*A')' since computing B*A directly is untenable 6037 6038 n p p 6039 [ ] [ ] [ ] 6040 m [ A ] * n [ B ] = m [ C ] 6041 [ ] [ ] [ ] 6042 6043 */ 6044 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6045 { 6046 Mat At, Bt, Ct; 6047 6048 PetscFunctionBegin; 6049 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6050 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6051 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6052 PetscCall(MatDestroy(&At)); 6053 PetscCall(MatDestroy(&Bt)); 6054 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6055 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6056 PetscCall(MatDestroy(&Ct)); 6057 PetscFunctionReturn(0); 6058 } 6059 6060 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6061 { 6062 PetscBool cisdense; 6063 6064 PetscFunctionBegin; 6065 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6066 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6067 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6068 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 6069 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6070 PetscCall(MatSetUp(C)); 6071 6072 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6073 PetscFunctionReturn(0); 6074 } 6075 6076 /* ----------------------------------------------------------------*/ 6077 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6078 { 6079 Mat_Product *product = C->product; 6080 Mat A = product->A, B = product->B; 6081 6082 PetscFunctionBegin; 6083 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6084 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6085 6086 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6087 C->ops->productsymbolic = MatProductSymbolic_AB; 6088 PetscFunctionReturn(0); 6089 } 6090 6091 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6092 { 6093 Mat_Product *product = C->product; 6094 6095 PetscFunctionBegin; 6096 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6097 PetscFunctionReturn(0); 6098 } 6099 6100 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6101 6102 Input Parameters: 6103 6104 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6105 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6106 6107 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6108 6109 For Set1, j1[] contains column indices of the nonzeros. 6110 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6111 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6112 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6113 6114 Similar for Set2. 6115 6116 This routine merges the two sets of nonzeros row by row and removes repeats. 6117 6118 Output Parameters: (memory is allocated by the caller) 6119 6120 i[],j[]: the CSR of the merged matrix, which has m rows. 6121 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6122 imap2[]: similar to imap1[], but for Set2. 6123 Note we order nonzeros row-by-row and from left to right. 6124 */ 6125 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6126 { 6127 PetscInt r, m; /* Row index of mat */ 6128 PetscCount t, t1, t2, b1, e1, b2, e2; 6129 6130 PetscFunctionBegin; 6131 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6132 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6133 i[0] = 0; 6134 for (r = 0; r < m; r++) { /* Do row by row merging */ 6135 b1 = rowBegin1[r]; 6136 e1 = rowEnd1[r]; 6137 b2 = rowBegin2[r]; 6138 e2 = rowEnd2[r]; 6139 while (b1 < e1 && b2 < e2) { 6140 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6141 j[t] = j1[b1]; 6142 imap1[t1] = t; 6143 imap2[t2] = t; 6144 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6145 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6146 t1++; 6147 t2++; 6148 t++; 6149 } else if (j1[b1] < j2[b2]) { 6150 j[t] = j1[b1]; 6151 imap1[t1] = t; 6152 b1 += jmap1[t1 + 1] - jmap1[t1]; 6153 t1++; 6154 t++; 6155 } else { 6156 j[t] = j2[b2]; 6157 imap2[t2] = t; 6158 b2 += jmap2[t2 + 1] - jmap2[t2]; 6159 t2++; 6160 t++; 6161 } 6162 } 6163 /* Merge the remaining in either j1[] or j2[] */ 6164 while (b1 < e1) { 6165 j[t] = j1[b1]; 6166 imap1[t1] = t; 6167 b1 += jmap1[t1 + 1] - jmap1[t1]; 6168 t1++; 6169 t++; 6170 } 6171 while (b2 < e2) { 6172 j[t] = j2[b2]; 6173 imap2[t2] = t; 6174 b2 += jmap2[t2 + 1] - jmap2[t2]; 6175 t2++; 6176 t++; 6177 } 6178 i[r + 1] = t; 6179 } 6180 PetscFunctionReturn(0); 6181 } 6182 6183 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6184 6185 Input Parameters: 6186 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6187 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6188 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6189 6190 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6191 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6192 6193 Output Parameters: 6194 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6195 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6196 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6197 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6198 6199 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6200 Atot: number of entries belonging to the diagonal block. 6201 Annz: number of unique nonzeros belonging to the diagonal block. 6202 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6203 repeats (i.e., same 'i,j' pair). 6204 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6205 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6206 6207 Atot: number of entries belonging to the diagonal block 6208 Annz: number of unique nonzeros belonging to the diagonal block. 6209 6210 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6211 6212 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6213 */ 6214 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6215 { 6216 PetscInt cstart, cend, rstart, rend, row, col; 6217 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6218 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6219 PetscCount k, m, p, q, r, s, mid; 6220 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6221 6222 PetscFunctionBegin; 6223 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6224 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6225 m = rend - rstart; 6226 6227 for (k = 0; k < n; k++) { 6228 if (i[k] >= 0) break; 6229 } /* Skip negative rows */ 6230 6231 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6232 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6233 */ 6234 while (k < n) { 6235 row = i[k]; 6236 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6237 for (s = k; s < n; s++) 6238 if (i[s] != row) break; 6239 for (p = k; p < s; p++) { 6240 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6241 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6242 } 6243 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6244 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6245 rowBegin[row - rstart] = k; 6246 rowMid[row - rstart] = mid; 6247 rowEnd[row - rstart] = s; 6248 6249 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6250 Atot += mid - k; 6251 Btot += s - mid; 6252 6253 /* Count unique nonzeros of this diag/offdiag row */ 6254 for (p = k; p < mid;) { 6255 col = j[p]; 6256 do { 6257 j[p] += PETSC_MAX_INT; 6258 p++; 6259 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6260 Annz++; 6261 } 6262 6263 for (p = mid; p < s;) { 6264 col = j[p]; 6265 do { 6266 p++; 6267 } while (p < s && j[p] == col); 6268 Bnnz++; 6269 } 6270 k = s; 6271 } 6272 6273 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6274 PetscCall(PetscMalloc1(Atot, &Aperm)); 6275 PetscCall(PetscMalloc1(Btot, &Bperm)); 6276 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6277 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6278 6279 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6280 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6281 for (r = 0; r < m; r++) { 6282 k = rowBegin[r]; 6283 mid = rowMid[r]; 6284 s = rowEnd[r]; 6285 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6286 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6287 Atot += mid - k; 6288 Btot += s - mid; 6289 6290 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6291 for (p = k; p < mid;) { 6292 col = j[p]; 6293 q = p; 6294 do { 6295 p++; 6296 } while (p < mid && j[p] == col); 6297 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6298 Annz++; 6299 } 6300 6301 for (p = mid; p < s;) { 6302 col = j[p]; 6303 q = p; 6304 do { 6305 p++; 6306 } while (p < s && j[p] == col); 6307 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6308 Bnnz++; 6309 } 6310 } 6311 /* Output */ 6312 *Aperm_ = Aperm; 6313 *Annz_ = Annz; 6314 *Atot_ = Atot; 6315 *Ajmap_ = Ajmap; 6316 *Bperm_ = Bperm; 6317 *Bnnz_ = Bnnz; 6318 *Btot_ = Btot; 6319 *Bjmap_ = Bjmap; 6320 PetscFunctionReturn(0); 6321 } 6322 6323 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6324 6325 Input Parameters: 6326 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6327 nnz: number of unique nonzeros in the merged matrix 6328 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6329 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6330 6331 Output Parameter: (memory is allocated by the caller) 6332 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6333 6334 Example: 6335 nnz1 = 4 6336 nnz = 6 6337 imap = [1,3,4,5] 6338 jmap = [0,3,5,6,7] 6339 then, 6340 jmap_new = [0,0,3,3,5,6,7] 6341 */ 6342 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6343 { 6344 PetscCount k, p; 6345 6346 PetscFunctionBegin; 6347 jmap_new[0] = 0; 6348 p = nnz; /* p loops over jmap_new[] backwards */ 6349 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6350 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6351 } 6352 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6353 PetscFunctionReturn(0); 6354 } 6355 6356 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6357 { 6358 MPI_Comm comm; 6359 PetscMPIInt rank, size; 6360 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6361 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6362 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6363 6364 PetscFunctionBegin; 6365 PetscCall(PetscFree(mpiaij->garray)); 6366 PetscCall(VecDestroy(&mpiaij->lvec)); 6367 #if defined(PETSC_USE_CTABLE) 6368 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6369 #else 6370 PetscCall(PetscFree(mpiaij->colmap)); 6371 #endif 6372 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6373 mat->assembled = PETSC_FALSE; 6374 mat->was_assembled = PETSC_FALSE; 6375 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6376 6377 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6378 PetscCallMPI(MPI_Comm_size(comm, &size)); 6379 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6380 PetscCall(PetscLayoutSetUp(mat->rmap)); 6381 PetscCall(PetscLayoutSetUp(mat->cmap)); 6382 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6383 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6384 PetscCall(MatGetLocalSize(mat, &m, &n)); 6385 PetscCall(MatGetSize(mat, &M, &N)); 6386 6387 /* ---------------------------------------------------------------------------*/ 6388 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6389 /* entries come first, then local rows, then remote rows. */ 6390 /* ---------------------------------------------------------------------------*/ 6391 PetscCount n1 = coo_n, *perm1; 6392 PetscInt *i1 = coo_i, *j1 = coo_j; 6393 6394 PetscCall(PetscMalloc1(n1, &perm1)); 6395 for (k = 0; k < n1; k++) perm1[k] = k; 6396 6397 /* Manipulate indices so that entries with negative row or col indices will have smallest 6398 row indices, local entries will have greater but negative row indices, and remote entries 6399 will have positive row indices. 6400 */ 6401 for (k = 0; k < n1; k++) { 6402 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6403 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6404 else { 6405 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6406 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6407 } 6408 } 6409 6410 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6411 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6412 for (k = 0; k < n1; k++) { 6413 if (i1[k] > PETSC_MIN_INT) break; 6414 } /* Advance k to the first entry we need to take care of */ 6415 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6416 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6417 6418 /* ---------------------------------------------------------------------------*/ 6419 /* Split local rows into diag/offdiag portions */ 6420 /* ---------------------------------------------------------------------------*/ 6421 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6422 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6423 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6424 6425 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6426 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6427 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6428 6429 /* ---------------------------------------------------------------------------*/ 6430 /* Send remote rows to their owner */ 6431 /* ---------------------------------------------------------------------------*/ 6432 /* Find which rows should be sent to which remote ranks*/ 6433 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6434 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6435 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6436 const PetscInt *ranges; 6437 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6438 6439 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6440 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6441 for (k = rem; k < n1;) { 6442 PetscMPIInt owner; 6443 PetscInt firstRow, lastRow; 6444 6445 /* Locate a row range */ 6446 firstRow = i1[k]; /* first row of this owner */ 6447 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6448 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6449 6450 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6451 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6452 6453 /* All entries in [k,p) belong to this remote owner */ 6454 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6455 PetscMPIInt *sendto2; 6456 PetscInt *nentries2; 6457 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6458 6459 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6460 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6461 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6462 PetscCall(PetscFree2(sendto, nentries2)); 6463 sendto = sendto2; 6464 nentries = nentries2; 6465 maxNsend = maxNsend2; 6466 } 6467 sendto[nsend] = owner; 6468 nentries[nsend] = p - k; 6469 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6470 nsend++; 6471 k = p; 6472 } 6473 6474 /* Build 1st SF to know offsets on remote to send data */ 6475 PetscSF sf1; 6476 PetscInt nroots = 1, nroots2 = 0; 6477 PetscInt nleaves = nsend, nleaves2 = 0; 6478 PetscInt *offsets; 6479 PetscSFNode *iremote; 6480 6481 PetscCall(PetscSFCreate(comm, &sf1)); 6482 PetscCall(PetscMalloc1(nsend, &iremote)); 6483 PetscCall(PetscMalloc1(nsend, &offsets)); 6484 for (k = 0; k < nsend; k++) { 6485 iremote[k].rank = sendto[k]; 6486 iremote[k].index = 0; 6487 nleaves2 += nentries[k]; 6488 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6489 } 6490 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6491 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6492 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6493 PetscCall(PetscSFDestroy(&sf1)); 6494 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6495 6496 /* Build 2nd SF to send remote COOs to their owner */ 6497 PetscSF sf2; 6498 nroots = nroots2; 6499 nleaves = nleaves2; 6500 PetscCall(PetscSFCreate(comm, &sf2)); 6501 PetscCall(PetscSFSetFromOptions(sf2)); 6502 PetscCall(PetscMalloc1(nleaves, &iremote)); 6503 p = 0; 6504 for (k = 0; k < nsend; k++) { 6505 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6506 for (q = 0; q < nentries[k]; q++, p++) { 6507 iremote[p].rank = sendto[k]; 6508 iremote[p].index = offsets[k] + q; 6509 } 6510 } 6511 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6512 6513 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6514 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6515 6516 /* Send the remote COOs to their owner */ 6517 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6518 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6519 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6520 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6521 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6522 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6523 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6524 6525 PetscCall(PetscFree(offsets)); 6526 PetscCall(PetscFree2(sendto, nentries)); 6527 6528 /* ---------------------------------------------------------------*/ 6529 /* Sort received COOs by row along with the permutation array */ 6530 /* ---------------------------------------------------------------*/ 6531 for (k = 0; k < n2; k++) perm2[k] = k; 6532 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6533 6534 /* ---------------------------------------------------------------*/ 6535 /* Split received COOs into diag/offdiag portions */ 6536 /* ---------------------------------------------------------------*/ 6537 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6538 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6539 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6540 6541 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6542 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6543 6544 /* --------------------------------------------------------------------------*/ 6545 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6546 /* --------------------------------------------------------------------------*/ 6547 PetscInt *Ai, *Bi; 6548 PetscInt *Aj, *Bj; 6549 6550 PetscCall(PetscMalloc1(m + 1, &Ai)); 6551 PetscCall(PetscMalloc1(m + 1, &Bi)); 6552 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6553 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6554 6555 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6556 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6557 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6558 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6559 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6560 6561 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6562 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6563 6564 /* --------------------------------------------------------------------------*/ 6565 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6566 /* expect nonzeros in A/B most likely have local contributing entries */ 6567 /* --------------------------------------------------------------------------*/ 6568 PetscInt Annz = Ai[m]; 6569 PetscInt Bnnz = Bi[m]; 6570 PetscCount *Ajmap1_new, *Bjmap1_new; 6571 6572 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6573 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6574 6575 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6576 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6577 6578 PetscCall(PetscFree(Aimap1)); 6579 PetscCall(PetscFree(Ajmap1)); 6580 PetscCall(PetscFree(Bimap1)); 6581 PetscCall(PetscFree(Bjmap1)); 6582 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6583 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6584 PetscCall(PetscFree(perm1)); 6585 PetscCall(PetscFree3(i2, j2, perm2)); 6586 6587 Ajmap1 = Ajmap1_new; 6588 Bjmap1 = Bjmap1_new; 6589 6590 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6591 if (Annz < Annz1 + Annz2) { 6592 PetscInt *Aj_new; 6593 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6594 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6595 PetscCall(PetscFree(Aj)); 6596 Aj = Aj_new; 6597 } 6598 6599 if (Bnnz < Bnnz1 + Bnnz2) { 6600 PetscInt *Bj_new; 6601 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6602 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6603 PetscCall(PetscFree(Bj)); 6604 Bj = Bj_new; 6605 } 6606 6607 /* --------------------------------------------------------------------------------*/ 6608 /* Create new submatrices for on-process and off-process coupling */ 6609 /* --------------------------------------------------------------------------------*/ 6610 PetscScalar *Aa, *Ba; 6611 MatType rtype; 6612 Mat_SeqAIJ *a, *b; 6613 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6614 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6615 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6616 if (cstart) { 6617 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6618 } 6619 PetscCall(MatDestroy(&mpiaij->A)); 6620 PetscCall(MatDestroy(&mpiaij->B)); 6621 PetscCall(MatGetRootType_Private(mat, &rtype)); 6622 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6623 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6624 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6625 6626 a = (Mat_SeqAIJ *)mpiaij->A->data; 6627 b = (Mat_SeqAIJ *)mpiaij->B->data; 6628 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6629 a->free_a = b->free_a = PETSC_TRUE; 6630 a->free_ij = b->free_ij = PETSC_TRUE; 6631 6632 /* conversion must happen AFTER multiply setup */ 6633 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6634 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6635 PetscCall(VecDestroy(&mpiaij->lvec)); 6636 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6637 6638 mpiaij->coo_n = coo_n; 6639 mpiaij->coo_sf = sf2; 6640 mpiaij->sendlen = nleaves; 6641 mpiaij->recvlen = nroots; 6642 6643 mpiaij->Annz = Annz; 6644 mpiaij->Bnnz = Bnnz; 6645 6646 mpiaij->Annz2 = Annz2; 6647 mpiaij->Bnnz2 = Bnnz2; 6648 6649 mpiaij->Atot1 = Atot1; 6650 mpiaij->Atot2 = Atot2; 6651 mpiaij->Btot1 = Btot1; 6652 mpiaij->Btot2 = Btot2; 6653 6654 mpiaij->Ajmap1 = Ajmap1; 6655 mpiaij->Aperm1 = Aperm1; 6656 6657 mpiaij->Bjmap1 = Bjmap1; 6658 mpiaij->Bperm1 = Bperm1; 6659 6660 mpiaij->Aimap2 = Aimap2; 6661 mpiaij->Ajmap2 = Ajmap2; 6662 mpiaij->Aperm2 = Aperm2; 6663 6664 mpiaij->Bimap2 = Bimap2; 6665 mpiaij->Bjmap2 = Bjmap2; 6666 mpiaij->Bperm2 = Bperm2; 6667 6668 mpiaij->Cperm1 = Cperm1; 6669 6670 /* Allocate in preallocation. If not used, it has zero cost on host */ 6671 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6672 PetscFunctionReturn(0); 6673 } 6674 6675 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6676 { 6677 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6678 Mat A = mpiaij->A, B = mpiaij->B; 6679 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6680 PetscScalar *Aa, *Ba; 6681 PetscScalar *sendbuf = mpiaij->sendbuf; 6682 PetscScalar *recvbuf = mpiaij->recvbuf; 6683 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6684 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6685 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6686 const PetscCount *Cperm1 = mpiaij->Cperm1; 6687 6688 PetscFunctionBegin; 6689 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6690 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6691 6692 /* Pack entries to be sent to remote */ 6693 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6694 6695 /* Send remote entries to their owner and overlap the communication with local computation */ 6696 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6697 /* Add local entries to A and B */ 6698 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6699 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6700 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6701 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6702 } 6703 for (PetscCount i = 0; i < Bnnz; i++) { 6704 PetscScalar sum = 0.0; 6705 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6706 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6707 } 6708 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6709 6710 /* Add received remote entries to A and B */ 6711 for (PetscCount i = 0; i < Annz2; i++) { 6712 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6713 } 6714 for (PetscCount i = 0; i < Bnnz2; i++) { 6715 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6716 } 6717 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6718 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6719 PetscFunctionReturn(0); 6720 } 6721 6722 /* ----------------------------------------------------------------*/ 6723 6724 /*MC 6725 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6726 6727 Options Database Keys: 6728 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6729 6730 Level: beginner 6731 6732 Notes: 6733 `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values, 6734 in this case the values associated with the rows and columns one passes in are set to zero 6735 in the matrix 6736 6737 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6738 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6739 6740 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6741 M*/ 6742 6743 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6744 { 6745 Mat_MPIAIJ *b; 6746 PetscMPIInt size; 6747 6748 PetscFunctionBegin; 6749 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6750 6751 PetscCall(PetscNew(&b)); 6752 B->data = (void *)b; 6753 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6754 B->assembled = PETSC_FALSE; 6755 B->insertmode = NOT_SET_VALUES; 6756 b->size = size; 6757 6758 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6759 6760 /* build cache for off array entries formed */ 6761 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6762 6763 b->donotstash = PETSC_FALSE; 6764 b->colmap = NULL; 6765 b->garray = NULL; 6766 b->roworiented = PETSC_TRUE; 6767 6768 /* stuff used for matrix vector multiply */ 6769 b->lvec = NULL; 6770 b->Mvctx = NULL; 6771 6772 /* stuff for MatGetRow() */ 6773 b->rowindices = NULL; 6774 b->rowvalues = NULL; 6775 b->getrowactive = PETSC_FALSE; 6776 6777 /* flexible pointer used in CUSPARSE classes */ 6778 b->spptr = NULL; 6779 6780 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6781 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6782 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6783 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6784 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6785 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6786 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6787 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6788 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6789 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6790 #if defined(PETSC_HAVE_CUDA) 6791 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6792 #endif 6793 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6794 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6795 #endif 6796 #if defined(PETSC_HAVE_MKL_SPARSE) 6797 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6798 #endif 6799 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6800 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6801 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6802 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6803 #if defined(PETSC_HAVE_ELEMENTAL) 6804 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6805 #endif 6806 #if defined(PETSC_HAVE_SCALAPACK) 6807 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6808 #endif 6809 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6810 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6811 #if defined(PETSC_HAVE_HYPRE) 6812 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6813 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6814 #endif 6815 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6816 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6817 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6818 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6819 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6820 PetscFunctionReturn(0); 6821 } 6822 6823 /*@C 6824 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6825 and "off-diagonal" part of the matrix in CSR format. 6826 6827 Collective 6828 6829 Input Parameters: 6830 + comm - MPI communicator 6831 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6832 . n - This value should be the same as the local size used in creating the 6833 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6834 calculated if N is given) For square matrices n is almost always m. 6835 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 6836 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 6837 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6838 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6839 . a - matrix values 6840 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6841 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6842 - oa - matrix values 6843 6844 Output Parameter: 6845 . mat - the matrix 6846 6847 Level: advanced 6848 6849 Notes: 6850 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6851 must free the arrays once the matrix has been destroyed and not before. 6852 6853 The i and j indices are 0 based 6854 6855 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6856 6857 This sets local rows and cannot be used to set off-processor values. 6858 6859 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6860 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6861 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6862 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6863 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6864 communication if it is known that only local entries will be set. 6865 6866 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6867 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6868 @*/ 6869 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6870 { 6871 Mat_MPIAIJ *maij; 6872 6873 PetscFunctionBegin; 6874 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6875 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6876 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6877 PetscCall(MatCreate(comm, mat)); 6878 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6879 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6880 maij = (Mat_MPIAIJ *)(*mat)->data; 6881 6882 (*mat)->preallocated = PETSC_TRUE; 6883 6884 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6885 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6886 6887 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6888 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6889 6890 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6891 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6892 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6893 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6894 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6895 PetscFunctionReturn(0); 6896 } 6897 6898 typedef struct { 6899 Mat *mp; /* intermediate products */ 6900 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6901 PetscInt cp; /* number of intermediate products */ 6902 6903 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6904 PetscInt *startsj_s, *startsj_r; 6905 PetscScalar *bufa; 6906 Mat P_oth; 6907 6908 /* may take advantage of merging product->B */ 6909 Mat Bloc; /* B-local by merging diag and off-diag */ 6910 6911 /* cusparse does not have support to split between symbolic and numeric phases. 6912 When api_user is true, we don't need to update the numerical values 6913 of the temporary storage */ 6914 PetscBool reusesym; 6915 6916 /* support for COO values insertion */ 6917 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6918 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6919 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6920 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6921 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6922 PetscMemType mtype; 6923 6924 /* customization */ 6925 PetscBool abmerge; 6926 PetscBool P_oth_bind; 6927 } MatMatMPIAIJBACKEND; 6928 6929 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6930 { 6931 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6932 PetscInt i; 6933 6934 PetscFunctionBegin; 6935 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6936 PetscCall(PetscFree(mmdata->bufa)); 6937 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6938 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6939 PetscCall(MatDestroy(&mmdata->P_oth)); 6940 PetscCall(MatDestroy(&mmdata->Bloc)); 6941 PetscCall(PetscSFDestroy(&mmdata->sf)); 6942 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6943 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6944 PetscCall(PetscFree(mmdata->own[0])); 6945 PetscCall(PetscFree(mmdata->own)); 6946 PetscCall(PetscFree(mmdata->off[0])); 6947 PetscCall(PetscFree(mmdata->off)); 6948 PetscCall(PetscFree(mmdata)); 6949 PetscFunctionReturn(0); 6950 } 6951 6952 /* Copy selected n entries with indices in idx[] of A to v[]. 6953 If idx is NULL, copy the whole data array of A to v[] 6954 */ 6955 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6956 { 6957 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6958 6959 PetscFunctionBegin; 6960 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 6961 if (f) { 6962 PetscCall((*f)(A, n, idx, v)); 6963 } else { 6964 const PetscScalar *vv; 6965 6966 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 6967 if (n && idx) { 6968 PetscScalar *w = v; 6969 const PetscInt *oi = idx; 6970 PetscInt j; 6971 6972 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6973 } else { 6974 PetscCall(PetscArraycpy(v, vv, n)); 6975 } 6976 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 6977 } 6978 PetscFunctionReturn(0); 6979 } 6980 6981 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6982 { 6983 MatMatMPIAIJBACKEND *mmdata; 6984 PetscInt i, n_d, n_o; 6985 6986 PetscFunctionBegin; 6987 MatCheckProduct(C, 1); 6988 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 6989 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 6990 if (!mmdata->reusesym) { /* update temporary matrices */ 6991 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 6992 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 6993 } 6994 mmdata->reusesym = PETSC_FALSE; 6995 6996 for (i = 0; i < mmdata->cp; i++) { 6997 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 6998 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6999 } 7000 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7001 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7002 7003 if (mmdata->mptmp[i]) continue; 7004 if (noff) { 7005 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7006 7007 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7008 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7009 n_o += noff; 7010 n_d += nown; 7011 } else { 7012 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7013 7014 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7015 n_d += mm->nz; 7016 } 7017 } 7018 if (mmdata->hasoffproc) { /* offprocess insertion */ 7019 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7020 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7021 } 7022 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7023 PetscFunctionReturn(0); 7024 } 7025 7026 /* Support for Pt * A, A * P, or Pt * A * P */ 7027 #define MAX_NUMBER_INTERMEDIATE 4 7028 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7029 { 7030 Mat_Product *product = C->product; 7031 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7032 Mat_MPIAIJ *a, *p; 7033 MatMatMPIAIJBACKEND *mmdata; 7034 ISLocalToGlobalMapping P_oth_l2g = NULL; 7035 IS glob = NULL; 7036 const char *prefix; 7037 char pprefix[256]; 7038 const PetscInt *globidx, *P_oth_idx; 7039 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7040 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7041 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7042 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7043 /* a base offset; type-2: sparse with a local to global map table */ 7044 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7045 7046 MatProductType ptype; 7047 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk; 7048 PetscMPIInt size; 7049 7050 PetscFunctionBegin; 7051 MatCheckProduct(C, 1); 7052 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7053 ptype = product->type; 7054 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7055 ptype = MATPRODUCT_AB; 7056 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7057 } 7058 switch (ptype) { 7059 case MATPRODUCT_AB: 7060 A = product->A; 7061 P = product->B; 7062 m = A->rmap->n; 7063 n = P->cmap->n; 7064 M = A->rmap->N; 7065 N = P->cmap->N; 7066 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7067 break; 7068 case MATPRODUCT_AtB: 7069 P = product->A; 7070 A = product->B; 7071 m = P->cmap->n; 7072 n = A->cmap->n; 7073 M = P->cmap->N; 7074 N = A->cmap->N; 7075 hasoffproc = PETSC_TRUE; 7076 break; 7077 case MATPRODUCT_PtAP: 7078 A = product->A; 7079 P = product->B; 7080 m = P->cmap->n; 7081 n = P->cmap->n; 7082 M = P->cmap->N; 7083 N = P->cmap->N; 7084 hasoffproc = PETSC_TRUE; 7085 break; 7086 default: 7087 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7088 } 7089 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7090 if (size == 1) hasoffproc = PETSC_FALSE; 7091 7092 /* defaults */ 7093 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7094 mp[i] = NULL; 7095 mptmp[i] = PETSC_FALSE; 7096 rmapt[i] = -1; 7097 cmapt[i] = -1; 7098 rmapa[i] = NULL; 7099 cmapa[i] = NULL; 7100 } 7101 7102 /* customization */ 7103 PetscCall(PetscNew(&mmdata)); 7104 mmdata->reusesym = product->api_user; 7105 if (ptype == MATPRODUCT_AB) { 7106 if (product->api_user) { 7107 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7108 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7109 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7110 PetscOptionsEnd(); 7111 } else { 7112 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7113 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7114 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7115 PetscOptionsEnd(); 7116 } 7117 } else if (ptype == MATPRODUCT_PtAP) { 7118 if (product->api_user) { 7119 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7120 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7121 PetscOptionsEnd(); 7122 } else { 7123 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7124 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7125 PetscOptionsEnd(); 7126 } 7127 } 7128 a = (Mat_MPIAIJ *)A->data; 7129 p = (Mat_MPIAIJ *)P->data; 7130 PetscCall(MatSetSizes(C, m, n, M, N)); 7131 PetscCall(PetscLayoutSetUp(C->rmap)); 7132 PetscCall(PetscLayoutSetUp(C->cmap)); 7133 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7134 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7135 7136 cp = 0; 7137 switch (ptype) { 7138 case MATPRODUCT_AB: /* A * P */ 7139 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7140 7141 /* A_diag * P_local (merged or not) */ 7142 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7143 /* P is product->B */ 7144 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7145 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7146 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7147 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7148 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7149 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7150 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7151 mp[cp]->product->api_user = product->api_user; 7152 PetscCall(MatProductSetFromOptions(mp[cp])); 7153 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7154 PetscCall(ISGetIndices(glob, &globidx)); 7155 rmapt[cp] = 1; 7156 cmapt[cp] = 2; 7157 cmapa[cp] = globidx; 7158 mptmp[cp] = PETSC_FALSE; 7159 cp++; 7160 } else { /* A_diag * P_diag and A_diag * P_off */ 7161 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7162 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7163 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7164 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7165 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7166 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7167 mp[cp]->product->api_user = product->api_user; 7168 PetscCall(MatProductSetFromOptions(mp[cp])); 7169 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7170 rmapt[cp] = 1; 7171 cmapt[cp] = 1; 7172 mptmp[cp] = PETSC_FALSE; 7173 cp++; 7174 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7175 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7176 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7177 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7178 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7179 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7180 mp[cp]->product->api_user = product->api_user; 7181 PetscCall(MatProductSetFromOptions(mp[cp])); 7182 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7183 rmapt[cp] = 1; 7184 cmapt[cp] = 2; 7185 cmapa[cp] = p->garray; 7186 mptmp[cp] = PETSC_FALSE; 7187 cp++; 7188 } 7189 7190 /* A_off * P_other */ 7191 if (mmdata->P_oth) { 7192 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7193 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7194 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7195 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7196 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7197 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7198 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7199 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7200 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7201 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7202 mp[cp]->product->api_user = product->api_user; 7203 PetscCall(MatProductSetFromOptions(mp[cp])); 7204 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7205 rmapt[cp] = 1; 7206 cmapt[cp] = 2; 7207 cmapa[cp] = P_oth_idx; 7208 mptmp[cp] = PETSC_FALSE; 7209 cp++; 7210 } 7211 break; 7212 7213 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7214 /* A is product->B */ 7215 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7216 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7217 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7218 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7219 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7220 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7221 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7222 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7223 mp[cp]->product->api_user = product->api_user; 7224 PetscCall(MatProductSetFromOptions(mp[cp])); 7225 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7226 PetscCall(ISGetIndices(glob, &globidx)); 7227 rmapt[cp] = 2; 7228 rmapa[cp] = globidx; 7229 cmapt[cp] = 2; 7230 cmapa[cp] = globidx; 7231 mptmp[cp] = PETSC_FALSE; 7232 cp++; 7233 } else { 7234 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7235 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7236 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7237 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7238 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7239 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7240 mp[cp]->product->api_user = product->api_user; 7241 PetscCall(MatProductSetFromOptions(mp[cp])); 7242 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7243 PetscCall(ISGetIndices(glob, &globidx)); 7244 rmapt[cp] = 1; 7245 cmapt[cp] = 2; 7246 cmapa[cp] = globidx; 7247 mptmp[cp] = PETSC_FALSE; 7248 cp++; 7249 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7250 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7251 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7252 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7253 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7254 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7255 mp[cp]->product->api_user = product->api_user; 7256 PetscCall(MatProductSetFromOptions(mp[cp])); 7257 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7258 rmapt[cp] = 2; 7259 rmapa[cp] = p->garray; 7260 cmapt[cp] = 2; 7261 cmapa[cp] = globidx; 7262 mptmp[cp] = PETSC_FALSE; 7263 cp++; 7264 } 7265 break; 7266 case MATPRODUCT_PtAP: 7267 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7268 /* P is product->B */ 7269 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7270 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7271 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7272 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7273 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7274 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7275 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7276 mp[cp]->product->api_user = product->api_user; 7277 PetscCall(MatProductSetFromOptions(mp[cp])); 7278 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7279 PetscCall(ISGetIndices(glob, &globidx)); 7280 rmapt[cp] = 2; 7281 rmapa[cp] = globidx; 7282 cmapt[cp] = 2; 7283 cmapa[cp] = globidx; 7284 mptmp[cp] = PETSC_FALSE; 7285 cp++; 7286 if (mmdata->P_oth) { 7287 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7288 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7289 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7290 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7291 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7292 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7293 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7294 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7295 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7296 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7297 mp[cp]->product->api_user = product->api_user; 7298 PetscCall(MatProductSetFromOptions(mp[cp])); 7299 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7300 mptmp[cp] = PETSC_TRUE; 7301 cp++; 7302 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7303 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7304 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7305 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7306 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7307 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7308 mp[cp]->product->api_user = product->api_user; 7309 PetscCall(MatProductSetFromOptions(mp[cp])); 7310 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7311 rmapt[cp] = 2; 7312 rmapa[cp] = globidx; 7313 cmapt[cp] = 2; 7314 cmapa[cp] = P_oth_idx; 7315 mptmp[cp] = PETSC_FALSE; 7316 cp++; 7317 } 7318 break; 7319 default: 7320 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7321 } 7322 /* sanity check */ 7323 if (size > 1) 7324 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7325 7326 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7327 for (i = 0; i < cp; i++) { 7328 mmdata->mp[i] = mp[i]; 7329 mmdata->mptmp[i] = mptmp[i]; 7330 } 7331 mmdata->cp = cp; 7332 C->product->data = mmdata; 7333 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7334 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7335 7336 /* memory type */ 7337 mmdata->mtype = PETSC_MEMTYPE_HOST; 7338 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7339 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7340 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7341 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7342 7343 /* prepare coo coordinates for values insertion */ 7344 7345 /* count total nonzeros of those intermediate seqaij Mats 7346 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7347 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7348 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7349 */ 7350 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7351 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7352 if (mptmp[cp]) continue; 7353 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7354 const PetscInt *rmap = rmapa[cp]; 7355 const PetscInt mr = mp[cp]->rmap->n; 7356 const PetscInt rs = C->rmap->rstart; 7357 const PetscInt re = C->rmap->rend; 7358 const PetscInt *ii = mm->i; 7359 for (i = 0; i < mr; i++) { 7360 const PetscInt gr = rmap[i]; 7361 const PetscInt nz = ii[i + 1] - ii[i]; 7362 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7363 else ncoo_oown += nz; /* this row is local */ 7364 } 7365 } else ncoo_d += mm->nz; 7366 } 7367 7368 /* 7369 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7370 7371 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7372 7373 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7374 7375 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7376 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7377 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7378 7379 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7380 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7381 */ 7382 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7383 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7384 7385 /* gather (i,j) of nonzeros inserted by remote procs */ 7386 if (hasoffproc) { 7387 PetscSF msf; 7388 PetscInt ncoo2, *coo_i2, *coo_j2; 7389 7390 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7391 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7392 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7393 7394 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7395 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7396 PetscInt *idxoff = mmdata->off[cp]; 7397 PetscInt *idxown = mmdata->own[cp]; 7398 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7399 const PetscInt *rmap = rmapa[cp]; 7400 const PetscInt *cmap = cmapa[cp]; 7401 const PetscInt *ii = mm->i; 7402 PetscInt *coi = coo_i + ncoo_o; 7403 PetscInt *coj = coo_j + ncoo_o; 7404 const PetscInt mr = mp[cp]->rmap->n; 7405 const PetscInt rs = C->rmap->rstart; 7406 const PetscInt re = C->rmap->rend; 7407 const PetscInt cs = C->cmap->rstart; 7408 for (i = 0; i < mr; i++) { 7409 const PetscInt *jj = mm->j + ii[i]; 7410 const PetscInt gr = rmap[i]; 7411 const PetscInt nz = ii[i + 1] - ii[i]; 7412 if (gr < rs || gr >= re) { /* this is an offproc row */ 7413 for (j = ii[i]; j < ii[i + 1]; j++) { 7414 *coi++ = gr; 7415 *idxoff++ = j; 7416 } 7417 if (!cmapt[cp]) { /* already global */ 7418 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7419 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7420 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7421 } else { /* offdiag */ 7422 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7423 } 7424 ncoo_o += nz; 7425 } else { /* this is a local row */ 7426 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7427 } 7428 } 7429 } 7430 mmdata->off[cp + 1] = idxoff; 7431 mmdata->own[cp + 1] = idxown; 7432 } 7433 7434 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7435 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7436 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7437 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7438 ncoo = ncoo_d + ncoo_oown + ncoo2; 7439 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7440 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7441 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7442 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7443 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7444 PetscCall(PetscFree2(coo_i, coo_j)); 7445 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7446 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7447 coo_i = coo_i2; 7448 coo_j = coo_j2; 7449 } else { /* no offproc values insertion */ 7450 ncoo = ncoo_d; 7451 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7452 7453 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7454 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7455 PetscCall(PetscSFSetUp(mmdata->sf)); 7456 } 7457 mmdata->hasoffproc = hasoffproc; 7458 7459 /* gather (i,j) of nonzeros inserted locally */ 7460 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7461 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7462 PetscInt *coi = coo_i + ncoo_d; 7463 PetscInt *coj = coo_j + ncoo_d; 7464 const PetscInt *jj = mm->j; 7465 const PetscInt *ii = mm->i; 7466 const PetscInt *cmap = cmapa[cp]; 7467 const PetscInt *rmap = rmapa[cp]; 7468 const PetscInt mr = mp[cp]->rmap->n; 7469 const PetscInt rs = C->rmap->rstart; 7470 const PetscInt re = C->rmap->rend; 7471 const PetscInt cs = C->cmap->rstart; 7472 7473 if (mptmp[cp]) continue; 7474 if (rmapt[cp] == 1) { /* consecutive rows */ 7475 /* fill coo_i */ 7476 for (i = 0; i < mr; i++) { 7477 const PetscInt gr = i + rs; 7478 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7479 } 7480 /* fill coo_j */ 7481 if (!cmapt[cp]) { /* type-0, already global */ 7482 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7483 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7484 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7485 } else { /* type-2, local to global for sparse columns */ 7486 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7487 } 7488 ncoo_d += mm->nz; 7489 } else if (rmapt[cp] == 2) { /* sparse rows */ 7490 for (i = 0; i < mr; i++) { 7491 const PetscInt *jj = mm->j + ii[i]; 7492 const PetscInt gr = rmap[i]; 7493 const PetscInt nz = ii[i + 1] - ii[i]; 7494 if (gr >= rs && gr < re) { /* local rows */ 7495 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7496 if (!cmapt[cp]) { /* type-0, already global */ 7497 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7498 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7499 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7500 } else { /* type-2, local to global for sparse columns */ 7501 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7502 } 7503 ncoo_d += nz; 7504 } 7505 } 7506 } 7507 } 7508 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7509 PetscCall(ISDestroy(&glob)); 7510 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7511 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7512 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7513 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7514 7515 /* preallocate with COO data */ 7516 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7517 PetscCall(PetscFree2(coo_i, coo_j)); 7518 PetscFunctionReturn(0); 7519 } 7520 7521 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7522 { 7523 Mat_Product *product = mat->product; 7524 #if defined(PETSC_HAVE_DEVICE) 7525 PetscBool match = PETSC_FALSE; 7526 PetscBool usecpu = PETSC_FALSE; 7527 #else 7528 PetscBool match = PETSC_TRUE; 7529 #endif 7530 7531 PetscFunctionBegin; 7532 MatCheckProduct(mat, 1); 7533 #if defined(PETSC_HAVE_DEVICE) 7534 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7535 if (match) { /* we can always fallback to the CPU if requested */ 7536 switch (product->type) { 7537 case MATPRODUCT_AB: 7538 if (product->api_user) { 7539 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7540 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7541 PetscOptionsEnd(); 7542 } else { 7543 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7544 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7545 PetscOptionsEnd(); 7546 } 7547 break; 7548 case MATPRODUCT_AtB: 7549 if (product->api_user) { 7550 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7551 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7552 PetscOptionsEnd(); 7553 } else { 7554 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7555 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7556 PetscOptionsEnd(); 7557 } 7558 break; 7559 case MATPRODUCT_PtAP: 7560 if (product->api_user) { 7561 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7562 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7563 PetscOptionsEnd(); 7564 } else { 7565 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7566 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7567 PetscOptionsEnd(); 7568 } 7569 break; 7570 default: 7571 break; 7572 } 7573 match = (PetscBool)!usecpu; 7574 } 7575 #endif 7576 if (match) { 7577 switch (product->type) { 7578 case MATPRODUCT_AB: 7579 case MATPRODUCT_AtB: 7580 case MATPRODUCT_PtAP: 7581 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7582 break; 7583 default: 7584 break; 7585 } 7586 } 7587 /* fallback to MPIAIJ ops */ 7588 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7589 PetscFunctionReturn(0); 7590 } 7591 7592 /* 7593 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7594 7595 n - the number of block indices in cc[] 7596 cc - the block indices (must be large enough to contain the indices) 7597 */ 7598 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7599 { 7600 PetscInt cnt = -1, nidx, j; 7601 const PetscInt *idx; 7602 7603 PetscFunctionBegin; 7604 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7605 if (nidx) { 7606 cnt = 0; 7607 cc[cnt] = idx[0] / bs; 7608 for (j = 1; j < nidx; j++) { 7609 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7610 } 7611 } 7612 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7613 *n = cnt + 1; 7614 PetscFunctionReturn(0); 7615 } 7616 7617 /* 7618 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7619 7620 ncollapsed - the number of block indices 7621 collapsed - the block indices (must be large enough to contain the indices) 7622 */ 7623 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7624 { 7625 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7626 7627 PetscFunctionBegin; 7628 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7629 for (i = start + 1; i < start + bs; i++) { 7630 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7631 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7632 cprevtmp = cprev; 7633 cprev = merged; 7634 merged = cprevtmp; 7635 } 7636 *ncollapsed = nprev; 7637 if (collapsed) *collapsed = cprev; 7638 PetscFunctionReturn(0); 7639 } 7640 7641 /* 7642 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7643 */ 7644 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7645 { 7646 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7647 Mat tGmat; 7648 MPI_Comm comm; 7649 const PetscScalar *vals; 7650 const PetscInt *idx; 7651 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7652 MatScalar *AA; // this is checked in graph 7653 PetscBool isseqaij; 7654 Mat a, b, c; 7655 MatType jtype; 7656 7657 PetscFunctionBegin; 7658 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7659 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7660 PetscCall(MatGetType(Gmat, &jtype)); 7661 PetscCall(MatCreate(comm, &tGmat)); 7662 PetscCall(MatSetType(tGmat, jtype)); 7663 7664 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7665 Also, if the matrix is symmetric, can we skip this 7666 operation? It can be very expensive on large matrices. */ 7667 7668 // global sizes 7669 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7670 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7671 nloc = Iend - Istart; 7672 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7673 if (isseqaij) { 7674 a = Gmat; 7675 b = NULL; 7676 } else { 7677 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7678 a = d->A; 7679 b = d->B; 7680 garray = d->garray; 7681 } 7682 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7683 for (PetscInt row = 0; row < nloc; row++) { 7684 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7685 d_nnz[row] = ncols; 7686 if (ncols > maxcols) maxcols = ncols; 7687 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7688 } 7689 if (b) { 7690 for (PetscInt row = 0; row < nloc; row++) { 7691 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7692 o_nnz[row] = ncols; 7693 if (ncols > maxcols) maxcols = ncols; 7694 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7695 } 7696 } 7697 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7698 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7699 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7700 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7701 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7702 PetscCall(PetscFree2(d_nnz, o_nnz)); 7703 // 7704 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7705 nnz0 = nnz1 = 0; 7706 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7707 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7708 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7709 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7710 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7711 if (PetscRealPart(sv) > vfilter) { 7712 nnz1++; 7713 PetscInt cid = idx[jj] + Istart; //diag 7714 if (c != a) cid = garray[idx[jj]]; 7715 AA[ncol_row] = vals[jj]; 7716 AJ[ncol_row] = cid; 7717 ncol_row++; 7718 } 7719 } 7720 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7721 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7722 } 7723 } 7724 PetscCall(PetscFree2(AA, AJ)); 7725 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7726 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7727 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7728 7729 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7730 7731 *filteredG = tGmat; 7732 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7733 PetscFunctionReturn(0); 7734 } 7735 7736 /* 7737 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7738 7739 Input Parameter: 7740 . Amat - matrix 7741 - symmetrize - make the result symmetric 7742 + scale - scale with diagonal 7743 7744 Output Parameter: 7745 . a_Gmat - output scalar graph >= 0 7746 7747 */ 7748 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7749 { 7750 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7751 MPI_Comm comm; 7752 Mat Gmat; 7753 PetscBool ismpiaij, isseqaij; 7754 Mat a, b, c; 7755 MatType jtype; 7756 7757 PetscFunctionBegin; 7758 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7759 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7760 PetscCall(MatGetSize(Amat, &MM, &NN)); 7761 PetscCall(MatGetBlockSize(Amat, &bs)); 7762 nloc = (Iend - Istart) / bs; 7763 7764 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7765 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7766 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7767 7768 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7769 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7770 implementation */ 7771 if (bs > 1) { 7772 PetscCall(MatGetType(Amat, &jtype)); 7773 PetscCall(MatCreate(comm, &Gmat)); 7774 PetscCall(MatSetType(Gmat, jtype)); 7775 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7776 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7777 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7778 PetscInt *d_nnz, *o_nnz; 7779 MatScalar *aa, val, AA[4096]; 7780 PetscInt *aj, *ai, AJ[4096], nc; 7781 if (isseqaij) { 7782 a = Amat; 7783 b = NULL; 7784 } else { 7785 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7786 a = d->A; 7787 b = d->B; 7788 } 7789 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7790 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7791 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7792 PetscInt *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0; 7793 const PetscInt *cols; 7794 for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows 7795 PetscCall(MatGetRow(c, brow, &jj, &cols, NULL)); 7796 nnz[brow / bs] = jj / bs; 7797 if (jj % bs) ok = 0; 7798 if (cols) j0 = cols[0]; 7799 else j0 = -1; 7800 PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL)); 7801 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7802 for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks 7803 PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL)); 7804 if (jj % bs) ok = 0; 7805 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7806 if (nnz[brow / bs] != jj / bs) ok = 0; 7807 PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL)); 7808 } 7809 if (!ok) { 7810 PetscCall(PetscFree2(d_nnz, o_nnz)); 7811 goto old_bs; 7812 } 7813 } 7814 PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax); 7815 } 7816 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7817 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7818 PetscCall(PetscFree2(d_nnz, o_nnz)); 7819 // diag 7820 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7821 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7822 ai = aseq->i; 7823 n = ai[brow + 1] - ai[brow]; 7824 aj = aseq->j + ai[brow]; 7825 for (int k = 0; k < n; k += bs) { // block columns 7826 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7827 val = 0; 7828 for (int ii = 0; ii < bs; ii++) { // rows in block 7829 aa = aseq->a + ai[brow + ii] + k; 7830 for (int jj = 0; jj < bs; jj++) { // columns in block 7831 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7832 } 7833 } 7834 AA[k / bs] = val; 7835 } 7836 grow = Istart / bs + brow / bs; 7837 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7838 } 7839 // off-diag 7840 if (ismpiaij) { 7841 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7842 const PetscScalar *vals; 7843 const PetscInt *cols, *garray = aij->garray; 7844 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7845 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7846 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7847 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7848 AA[k / bs] = 0; 7849 AJ[cidx] = garray[cols[k]] / bs; 7850 } 7851 nc = ncols / bs; 7852 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7853 for (int ii = 0; ii < bs; ii++) { // rows in block 7854 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7855 for (int k = 0; k < ncols; k += bs) { 7856 for (int jj = 0; jj < bs; jj++) { // cols in block 7857 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7858 } 7859 } 7860 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7861 } 7862 grow = Istart / bs + brow / bs; 7863 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7864 } 7865 } 7866 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7867 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7868 } else { 7869 const PetscScalar *vals; 7870 const PetscInt *idx; 7871 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7872 old_bs: 7873 /* 7874 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7875 */ 7876 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7877 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7878 if (isseqaij) { 7879 PetscInt max_d_nnz; 7880 /* 7881 Determine exact preallocation count for (sequential) scalar matrix 7882 */ 7883 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7884 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7885 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7886 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7887 PetscCall(PetscFree3(w0, w1, w2)); 7888 } else if (ismpiaij) { 7889 Mat Daij, Oaij; 7890 const PetscInt *garray; 7891 PetscInt max_d_nnz; 7892 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7893 /* 7894 Determine exact preallocation count for diagonal block portion of scalar matrix 7895 */ 7896 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7897 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7898 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7899 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7900 PetscCall(PetscFree3(w0, w1, w2)); 7901 /* 7902 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7903 */ 7904 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7905 o_nnz[jj] = 0; 7906 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7907 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7908 o_nnz[jj] += ncols; 7909 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7910 } 7911 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7912 } 7913 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7914 /* get scalar copy (norms) of matrix */ 7915 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7916 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7917 PetscCall(PetscFree2(d_nnz, o_nnz)); 7918 for (Ii = Istart; Ii < Iend; Ii++) { 7919 PetscInt dest_row = Ii / bs; 7920 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7921 for (jj = 0; jj < ncols; jj++) { 7922 PetscInt dest_col = idx[jj] / bs; 7923 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7924 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7925 } 7926 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7927 } 7928 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7929 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7930 } 7931 } else { 7932 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7933 else { 7934 Gmat = Amat; 7935 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7936 } 7937 if (isseqaij) { 7938 a = Gmat; 7939 b = NULL; 7940 } else { 7941 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7942 a = d->A; 7943 b = d->B; 7944 } 7945 if (filter >= 0 || scale) { 7946 /* take absolute value of each entry */ 7947 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7948 MatInfo info; 7949 PetscScalar *avals; 7950 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7951 PetscCall(MatSeqAIJGetArray(c, &avals)); 7952 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7953 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7954 } 7955 } 7956 } 7957 if (symmetrize) { 7958 PetscBool isset, issym; 7959 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7960 if (!isset || !issym) { 7961 Mat matTrans; 7962 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7963 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7964 PetscCall(MatDestroy(&matTrans)); 7965 } 7966 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7967 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7968 if (scale) { 7969 /* scale c for all diagonal values = 1 or -1 */ 7970 Vec diag; 7971 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7972 PetscCall(MatGetDiagonal(Gmat, diag)); 7973 PetscCall(VecReciprocal(diag)); 7974 PetscCall(VecSqrtAbs(diag)); 7975 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7976 PetscCall(VecDestroy(&diag)); 7977 } 7978 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7979 7980 if (filter >= 0) { 7981 Mat Fmat = NULL; /* some silly compiler needs this */ 7982 7983 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 7984 PetscCall(MatDestroy(&Gmat)); 7985 Gmat = Fmat; 7986 } 7987 *a_Gmat = Gmat; 7988 PetscFunctionReturn(0); 7989 } 7990 7991 /* 7992 Special version for direct calls from Fortran 7993 */ 7994 #include <petsc/private/fortranimpl.h> 7995 7996 /* Change these macros so can be used in void function */ 7997 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7998 #undef PetscCall 7999 #define PetscCall(...) \ 8000 do { \ 8001 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8002 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8003 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8004 return; \ 8005 } \ 8006 } while (0) 8007 8008 #undef SETERRQ 8009 #define SETERRQ(comm, ierr, ...) \ 8010 do { \ 8011 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8012 return; \ 8013 } while (0) 8014 8015 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8016 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8017 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8018 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8019 #else 8020 #endif 8021 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8022 { 8023 Mat mat = *mmat; 8024 PetscInt m = *mm, n = *mn; 8025 InsertMode addv = *maddv; 8026 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8027 PetscScalar value; 8028 8029 MatCheckPreallocated(mat, 1); 8030 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8031 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8032 { 8033 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8034 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8035 PetscBool roworiented = aij->roworiented; 8036 8037 /* Some Variables required in the macro */ 8038 Mat A = aij->A; 8039 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8040 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8041 MatScalar *aa; 8042 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8043 Mat B = aij->B; 8044 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8045 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8046 MatScalar *ba; 8047 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8048 * cannot use "#if defined" inside a macro. */ 8049 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8050 8051 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8052 PetscInt nonew = a->nonew; 8053 MatScalar *ap1, *ap2; 8054 8055 PetscFunctionBegin; 8056 PetscCall(MatSeqAIJGetArray(A, &aa)); 8057 PetscCall(MatSeqAIJGetArray(B, &ba)); 8058 for (i = 0; i < m; i++) { 8059 if (im[i] < 0) continue; 8060 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8061 if (im[i] >= rstart && im[i] < rend) { 8062 row = im[i] - rstart; 8063 lastcol1 = -1; 8064 rp1 = aj + ai[row]; 8065 ap1 = aa + ai[row]; 8066 rmax1 = aimax[row]; 8067 nrow1 = ailen[row]; 8068 low1 = 0; 8069 high1 = nrow1; 8070 lastcol2 = -1; 8071 rp2 = bj + bi[row]; 8072 ap2 = ba + bi[row]; 8073 rmax2 = bimax[row]; 8074 nrow2 = bilen[row]; 8075 low2 = 0; 8076 high2 = nrow2; 8077 8078 for (j = 0; j < n; j++) { 8079 if (roworiented) value = v[i * n + j]; 8080 else value = v[i + j * m]; 8081 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8082 if (in[j] >= cstart && in[j] < cend) { 8083 col = in[j] - cstart; 8084 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8085 } else if (in[j] < 0) continue; 8086 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8087 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 8088 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8089 } else { 8090 if (mat->was_assembled) { 8091 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8092 #if defined(PETSC_USE_CTABLE) 8093 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8094 col--; 8095 #else 8096 col = aij->colmap[in[j]] - 1; 8097 #endif 8098 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8099 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8100 col = in[j]; 8101 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8102 B = aij->B; 8103 b = (Mat_SeqAIJ *)B->data; 8104 bimax = b->imax; 8105 bi = b->i; 8106 bilen = b->ilen; 8107 bj = b->j; 8108 rp2 = bj + bi[row]; 8109 ap2 = ba + bi[row]; 8110 rmax2 = bimax[row]; 8111 nrow2 = bilen[row]; 8112 low2 = 0; 8113 high2 = nrow2; 8114 bm = aij->B->rmap->n; 8115 ba = b->a; 8116 inserted = PETSC_FALSE; 8117 } 8118 } else col = in[j]; 8119 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8120 } 8121 } 8122 } else if (!aij->donotstash) { 8123 if (roworiented) { 8124 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8125 } else { 8126 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8127 } 8128 } 8129 } 8130 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8131 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8132 } 8133 PetscFunctionReturnVoid(); 8134 } 8135 8136 /* Undefining these here since they were redefined from their original definition above! No 8137 * other PETSc functions should be defined past this point, as it is impossible to recover the 8138 * original definitions */ 8139 #undef PetscCall 8140 #undef SETERRQ 8141