1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 167 PetscFunctionReturn(PETSC_SUCCESS); 168 } 169 170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 171 { 172 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 173 174 PetscFunctionBegin; 175 if (mat->A) { 176 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 177 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 178 } 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181 182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 183 { 184 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 186 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 187 const PetscInt *ia, *ib; 188 const MatScalar *aa, *bb, *aav, *bav; 189 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 190 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 191 192 PetscFunctionBegin; 193 *keptrows = NULL; 194 195 ia = a->i; 196 ib = b->i; 197 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 198 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 199 for (i = 0; i < m; i++) { 200 na = ia[i + 1] - ia[i]; 201 nb = ib[i + 1] - ib[i]; 202 if (!na && !nb) { 203 cnt++; 204 goto ok1; 205 } 206 aa = aav + ia[i]; 207 for (j = 0; j < na; j++) { 208 if (aa[j] != 0.0) goto ok1; 209 } 210 bb = bav ? bav + ib[i] : NULL; 211 for (j = 0; j < nb; j++) { 212 if (bb[j] != 0.0) goto ok1; 213 } 214 cnt++; 215 ok1:; 216 } 217 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 218 if (!n0rows) { 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 220 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 221 PetscFunctionReturn(PETSC_SUCCESS); 222 } 223 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 224 cnt = 0; 225 for (i = 0; i < m; i++) { 226 na = ia[i + 1] - ia[i]; 227 nb = ib[i + 1] - ib[i]; 228 if (!na && !nb) continue; 229 aa = aav + ia[i]; 230 for (j = 0; j < na; j++) { 231 if (aa[j] != 0.0) { 232 rows[cnt++] = rstart + i; 233 goto ok2; 234 } 235 } 236 bb = bav ? bav + ib[i] : NULL; 237 for (j = 0; j < nb; j++) { 238 if (bb[j] != 0.0) { 239 rows[cnt++] = rstart + i; 240 goto ok2; 241 } 242 } 243 ok2:; 244 } 245 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 247 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 252 { 253 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 254 PetscBool cong; 255 256 PetscFunctionBegin; 257 PetscCall(MatHasCongruentLayouts(Y, &cong)); 258 if (Y->assembled && cong) { 259 PetscCall(MatDiagonalSet(aij->A, D, is)); 260 } else { 261 PetscCall(MatDiagonalSet_Default(Y, D, is)); 262 } 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 267 { 268 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 269 PetscInt i, rstart, nrows, *rows; 270 271 PetscFunctionBegin; 272 *zrows = NULL; 273 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 274 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 275 for (i = 0; i < nrows; i++) rows[i] += rstart; 276 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 283 PetscInt i, m, n, *garray = aij->garray; 284 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 285 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 286 PetscReal *work; 287 const PetscScalar *dummy; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 if (type == NORM_INFINITY) { 313 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 314 } else { 315 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 316 } 317 PetscCall(PetscFree(work)); 318 if (type == NORM_2) { 319 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 320 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 321 for (i = 0; i < n; i++) reductions[i] /= m; 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 327 { 328 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 329 IS sis, gis; 330 const PetscInt *isis, *igis; 331 PetscInt n, *iis, nsis, ngis, rstart, i; 332 333 PetscFunctionBegin; 334 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 335 PetscCall(MatFindNonzeroRows(a->B, &gis)); 336 PetscCall(ISGetSize(gis, &ngis)); 337 PetscCall(ISGetSize(sis, &nsis)); 338 PetscCall(ISGetIndices(sis, &isis)); 339 PetscCall(ISGetIndices(gis, &igis)); 340 341 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 342 PetscCall(PetscArraycpy(iis, igis, ngis)); 343 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 344 n = ngis + nsis; 345 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 346 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 347 for (i = 0; i < n; i++) iis[i] += rstart; 348 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 349 350 PetscCall(ISRestoreIndices(sis, &isis)); 351 PetscCall(ISRestoreIndices(gis, &igis)); 352 PetscCall(ISDestroy(&sis)); 353 PetscCall(ISDestroy(&gis)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 /* 358 Local utility routine that creates a mapping from the global column 359 number to the local number in the off-diagonal part of the local 360 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 361 a slightly higher hash table cost; without it it is not scalable (each processor 362 has an order N integer array but is fast to access. 363 */ 364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 365 { 366 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 367 PetscInt n = aij->B->cmap->n, i; 368 369 PetscFunctionBegin; 370 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 371 #if defined(PETSC_USE_CTABLE) 372 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 373 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 374 #else 375 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 376 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 377 #endif 378 PetscFunctionReturn(PETSC_SUCCESS); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 382 do { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col; \ 386 while (high1 - low1 > 5) { \ 387 t = (low1 + high1) / 2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i = low1; _i < high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) { \ 395 ap1[_i] += value; \ 396 /* Not sure LogFlops will slow dow the code or not */ \ 397 (void)PetscLogFlops(1.0); \ 398 } else ap1[_i] = value; \ 399 goto a_noinsert; \ 400 } \ 401 } \ 402 if (value == 0.0 && ignorezeroentries && row != col) { \ 403 low1 = 0; \ 404 high1 = nrow1; \ 405 goto a_noinsert; \ 406 } \ 407 if (nonew == 1) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 413 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 414 N = nrow1++ - 1; \ 415 a->nz++; \ 416 high1++; \ 417 /* shift up all the later entries in this row */ \ 418 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 419 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 B->nonzerostate++; \ 468 b_noinsert:; \ 469 bilen[row] = nrow2; \ 470 } while (0) 471 472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 473 { 474 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 475 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 476 PetscInt l, *garray = mat->garray, diag; 477 PetscScalar *aa, *ba; 478 479 PetscFunctionBegin; 480 /* code only works for square matrices A */ 481 482 /* find size of row to the left of the diagonal part */ 483 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 484 row = row - diag; 485 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 486 if (garray[b->j[b->i[row] + l]] > diag) break; 487 } 488 if (l) { 489 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 490 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 491 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 492 } 493 494 /* diagonal part */ 495 if (a->i[row + 1] - a->i[row]) { 496 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 497 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 498 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 499 } 500 501 /* right of diagonal part */ 502 if (b->i[row + 1] - b->i[row] - l) { 503 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 504 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 505 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 506 } 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 511 { 512 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 513 PetscScalar value = 0.0; 514 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 515 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 516 PetscBool roworiented = aij->roworiented; 517 518 /* Some Variables required in the macro */ 519 Mat A = aij->A; 520 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 521 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 522 PetscBool ignorezeroentries = a->ignorezeroentries; 523 Mat B = aij->B; 524 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 525 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 526 MatScalar *aa, *ba; 527 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 528 PetscInt nonew; 529 MatScalar *ap1, *ap2; 530 531 PetscFunctionBegin; 532 PetscCall(MatSeqAIJGetArray(A, &aa)); 533 PetscCall(MatSeqAIJGetArray(B, &ba)); 534 for (i = 0; i < m; i++) { 535 if (im[i] < 0) continue; 536 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 537 if (im[i] >= rstart && im[i] < rend) { 538 row = im[i] - rstart; 539 lastcol1 = -1; 540 rp1 = aj ? aj + ai[row] : NULL; 541 ap1 = aa ? aa + ai[row] : NULL; 542 rmax1 = aimax[row]; 543 nrow1 = ailen[row]; 544 low1 = 0; 545 high1 = nrow1; 546 lastcol2 = -1; 547 rp2 = bj ? bj + bi[row] : NULL; 548 ap2 = ba ? ba + bi[row] : NULL; 549 rmax2 = bimax[row]; 550 nrow2 = bilen[row]; 551 low2 = 0; 552 high2 = nrow2; 553 554 for (j = 0; j < n; j++) { 555 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 556 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 557 if (in[j] >= cstart && in[j] < cend) { 558 col = in[j] - cstart; 559 nonew = a->nonew; 560 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 561 } else if (in[j] < 0) { 562 continue; 563 } else { 564 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 565 if (mat->was_assembled) { 566 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 567 #if defined(PETSC_USE_CTABLE) 568 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 569 col--; 570 #else 571 col = aij->colmap[in[j]] - 1; 572 #endif 573 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 574 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 575 col = in[j]; 576 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 577 B = aij->B; 578 b = (Mat_SeqAIJ *)B->data; 579 bimax = b->imax; 580 bi = b->i; 581 bilen = b->ilen; 582 bj = b->j; 583 ba = b->a; 584 rp2 = bj + bi[row]; 585 ap2 = ba + bi[row]; 586 rmax2 = bimax[row]; 587 nrow2 = bilen[row]; 588 low2 = 0; 589 high2 = nrow2; 590 bm = aij->B->rmap->n; 591 ba = b->a; 592 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 593 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 594 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 595 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 596 } 597 } else col = in[j]; 598 nonew = b->nonew; 599 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 600 } 601 } 602 } else { 603 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 604 if (!aij->donotstash) { 605 mat->assembled = PETSC_FALSE; 606 if (roworiented) { 607 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v ? v + i * n : NULL, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 608 } else { 609 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v ? v + i : NULL, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 610 } 611 } 612 } 613 } 614 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 615 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 /* 620 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 621 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 622 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 623 */ 624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 627 Mat A = aij->A; /* diagonal part of the matrix */ 628 Mat B = aij->B; /* off-diagonal part of the matrix */ 629 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 630 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 632 PetscInt *ailen = a->ilen, *aj = a->j; 633 PetscInt *bilen = b->ilen, *bj = b->j; 634 PetscInt am = aij->A->rmap->n, j; 635 PetscInt diag_so_far = 0, dnz; 636 PetscInt offd_so_far = 0, onz; 637 638 PetscFunctionBegin; 639 /* Iterate over all rows of the matrix */ 640 for (j = 0; j < am; j++) { 641 dnz = onz = 0; 642 /* Iterate over all non-zero columns of the current row */ 643 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 644 /* If column is in the diagonal */ 645 if (mat_j[col] >= cstart && mat_j[col] < cend) { 646 aj[diag_so_far++] = mat_j[col] - cstart; 647 dnz++; 648 } else { /* off-diagonal entries */ 649 bj[offd_so_far++] = mat_j[col]; 650 onz++; 651 } 652 } 653 ailen[j] = dnz; 654 bilen[j] = onz; 655 } 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 /* 660 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 661 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 662 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 663 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 664 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 665 */ 666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 669 Mat A = aij->A; /* diagonal part of the matrix */ 670 Mat B = aij->B; /* off-diagonal part of the matrix */ 671 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 673 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 674 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 675 PetscInt *ailen = a->ilen, *aj = a->j; 676 PetscInt *bilen = b->ilen, *bj = b->j; 677 PetscInt am = aij->A->rmap->n, j; 678 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 679 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 680 PetscScalar *aa = a->a, *ba = b->a; 681 682 PetscFunctionBegin; 683 /* Iterate over all rows of the matrix */ 684 for (j = 0; j < am; j++) { 685 dnz_row = onz_row = 0; 686 rowstart_offd = full_offd_i[j]; 687 rowstart_diag = full_diag_i[j]; 688 /* Iterate over all non-zero columns of the current row */ 689 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 690 /* If column is in the diagonal */ 691 if (mat_j[col] >= cstart && mat_j[col] < cend) { 692 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 693 aa[rowstart_diag + dnz_row] = mat_a[col]; 694 dnz_row++; 695 } else { /* off-diagonal entries */ 696 bj[rowstart_offd + onz_row] = mat_j[col]; 697 ba[rowstart_offd + onz_row] = mat_a[col]; 698 onz_row++; 699 } 700 } 701 ailen[j] = dnz_row; 702 bilen[j] = onz_row; 703 } 704 PetscFunctionReturn(PETSC_SUCCESS); 705 } 706 707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 708 { 709 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 710 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 711 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 712 713 PetscFunctionBegin; 714 for (i = 0; i < m; i++) { 715 if (idxm[i] < 0) continue; /* negative row */ 716 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 717 if (idxm[i] >= rstart && idxm[i] < rend) { 718 row = idxm[i] - rstart; 719 for (j = 0; j < n; j++) { 720 if (idxn[j] < 0) continue; /* negative column */ 721 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 722 if (idxn[j] >= cstart && idxn[j] < cend) { 723 col = idxn[j] - cstart; 724 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 725 } else { 726 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 727 #if defined(PETSC_USE_CTABLE) 728 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 729 col--; 730 #else 731 col = aij->colmap[idxn[j]] - 1; 732 #endif 733 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 734 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 735 } 736 } 737 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 738 } 739 PetscFunctionReturn(PETSC_SUCCESS); 740 } 741 742 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 745 PetscInt nstash, reallocs; 746 747 PetscFunctionBegin; 748 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 749 750 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 751 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 752 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 753 PetscFunctionReturn(PETSC_SUCCESS); 754 } 755 756 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 757 { 758 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 759 PetscMPIInt n; 760 PetscInt i, j, rstart, ncols, flg; 761 PetscInt *row, *col; 762 PetscBool other_disassembled; 763 PetscScalar *val; 764 765 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 766 767 PetscFunctionBegin; 768 if (!aij->donotstash && !mat->nooffprocentries) { 769 while (1) { 770 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 771 if (!flg) break; 772 773 for (i = 0; i < n;) { 774 /* Now identify the consecutive vals belonging to the same row */ 775 for (j = i, rstart = row[j]; j < n; j++) { 776 if (row[j] != rstart) break; 777 } 778 if (j < n) ncols = j - i; 779 else ncols = n - i; 780 /* Now assemble all these values with a single function call */ 781 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 782 i = j; 783 } 784 } 785 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 786 } 787 #if defined(PETSC_HAVE_DEVICE) 788 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 789 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 790 if (mat->boundtocpu) { 791 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 792 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 793 } 794 #endif 795 PetscCall(MatAssemblyBegin(aij->A, mode)); 796 PetscCall(MatAssemblyEnd(aij->A, mode)); 797 798 /* determine if any processor has disassembled, if so we must 799 also disassemble ourself, in order that we may reassemble. */ 800 /* 801 if nonzero structure of submatrix B cannot change then we know that 802 no processor disassembled thus we can skip this stuff 803 */ 804 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 805 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 806 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 807 PetscCall(MatDisAssemble_MPIAIJ(mat)); 808 } 809 } 810 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 811 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 812 #if defined(PETSC_HAVE_DEVICE) 813 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 814 #endif 815 PetscCall(MatAssemblyBegin(aij->B, mode)); 816 PetscCall(MatAssemblyEnd(aij->B, mode)); 817 818 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 819 820 aij->rowvalues = NULL; 821 822 PetscCall(VecDestroy(&aij->diag)); 823 824 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 825 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 826 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 827 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 828 } 829 #if defined(PETSC_HAVE_DEVICE) 830 mat->offloadmask = PETSC_OFFLOAD_BOTH; 831 #endif 832 PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 836 { 837 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 838 839 PetscFunctionBegin; 840 PetscCall(MatZeroEntries(l->A)); 841 PetscCall(MatZeroEntries(l->B)); 842 PetscFunctionReturn(PETSC_SUCCESS); 843 } 844 845 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 846 { 847 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 848 PetscObjectState sA, sB; 849 PetscInt *lrows; 850 PetscInt r, len; 851 PetscBool cong, lch, gch; 852 853 PetscFunctionBegin; 854 /* get locally owned rows */ 855 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 856 PetscCall(MatHasCongruentLayouts(A, &cong)); 857 /* fix right hand side if needed */ 858 if (x && b) { 859 const PetscScalar *xx; 860 PetscScalar *bb; 861 862 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 863 PetscCall(VecGetArrayRead(x, &xx)); 864 PetscCall(VecGetArray(b, &bb)); 865 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 866 PetscCall(VecRestoreArrayRead(x, &xx)); 867 PetscCall(VecRestoreArray(b, &bb)); 868 } 869 870 sA = mat->A->nonzerostate; 871 sB = mat->B->nonzerostate; 872 873 if (diag != 0.0 && cong) { 874 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 875 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 876 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 877 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 878 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 879 PetscInt nnwA, nnwB; 880 PetscBool nnzA, nnzB; 881 882 nnwA = aijA->nonew; 883 nnwB = aijB->nonew; 884 nnzA = aijA->keepnonzeropattern; 885 nnzB = aijB->keepnonzeropattern; 886 if (!nnzA) { 887 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 888 aijA->nonew = 0; 889 } 890 if (!nnzB) { 891 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 892 aijB->nonew = 0; 893 } 894 /* Must zero here before the next loop */ 895 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 896 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 897 for (r = 0; r < len; ++r) { 898 const PetscInt row = lrows[r] + A->rmap->rstart; 899 if (row >= A->cmap->N) continue; 900 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 901 } 902 aijA->nonew = nnwA; 903 aijB->nonew = nnwB; 904 } else { 905 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 906 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 907 } 908 PetscCall(PetscFree(lrows)); 909 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 910 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 911 912 /* reduce nonzerostate */ 913 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 914 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 915 if (gch) A->nonzerostate++; 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 919 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 920 { 921 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 922 PetscMPIInt n = A->rmap->n; 923 PetscInt i, j, r, m, len = 0; 924 PetscInt *lrows, *owners = A->rmap->range; 925 PetscMPIInt p = 0; 926 PetscSFNode *rrows; 927 PetscSF sf; 928 const PetscScalar *xx; 929 PetscScalar *bb, *mask, *aij_a; 930 Vec xmask, lmask; 931 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 932 const PetscInt *aj, *ii, *ridx; 933 PetscScalar *aa; 934 935 PetscFunctionBegin; 936 /* Create SF where leaves are input rows and roots are owned rows */ 937 PetscCall(PetscMalloc1(n, &lrows)); 938 for (r = 0; r < n; ++r) lrows[r] = -1; 939 PetscCall(PetscMalloc1(N, &rrows)); 940 for (r = 0; r < N; ++r) { 941 const PetscInt idx = rows[r]; 942 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 943 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 944 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 945 } 946 rrows[r].rank = p; 947 rrows[r].index = rows[r] - owners[p]; 948 } 949 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 950 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 951 /* Collect flags for rows to be zeroed */ 952 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 953 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 954 PetscCall(PetscSFDestroy(&sf)); 955 /* Compress and put in row numbers */ 956 for (r = 0; r < n; ++r) 957 if (lrows[r] >= 0) lrows[len++] = r; 958 /* zero diagonal part of matrix */ 959 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 960 /* handle off-diagonal part of matrix */ 961 PetscCall(MatCreateVecs(A, &xmask, NULL)); 962 PetscCall(VecDuplicate(l->lvec, &lmask)); 963 PetscCall(VecGetArray(xmask, &bb)); 964 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 965 PetscCall(VecRestoreArray(xmask, &bb)); 966 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 967 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 968 PetscCall(VecDestroy(&xmask)); 969 if (x && b) { /* this code is buggy when the row and column layout don't match */ 970 PetscBool cong; 971 972 PetscCall(MatHasCongruentLayouts(A, &cong)); 973 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 974 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 975 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 976 PetscCall(VecGetArrayRead(l->lvec, &xx)); 977 PetscCall(VecGetArray(b, &bb)); 978 } 979 PetscCall(VecGetArray(lmask, &mask)); 980 /* remove zeroed rows of off-diagonal matrix */ 981 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 982 ii = aij->i; 983 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 984 /* loop over all elements of off process part of matrix zeroing removed columns*/ 985 if (aij->compressedrow.use) { 986 m = aij->compressedrow.nrows; 987 ii = aij->compressedrow.i; 988 ridx = aij->compressedrow.rindex; 989 for (i = 0; i < m; i++) { 990 n = ii[i + 1] - ii[i]; 991 aj = aij->j + ii[i]; 992 aa = aij_a + ii[i]; 993 994 for (j = 0; j < n; j++) { 995 if (PetscAbsScalar(mask[*aj])) { 996 if (b) bb[*ridx] -= *aa * xx[*aj]; 997 *aa = 0.0; 998 } 999 aa++; 1000 aj++; 1001 } 1002 ridx++; 1003 } 1004 } else { /* do not use compressed row format */ 1005 m = l->B->rmap->n; 1006 for (i = 0; i < m; i++) { 1007 n = ii[i + 1] - ii[i]; 1008 aj = aij->j + ii[i]; 1009 aa = aij_a + ii[i]; 1010 for (j = 0; j < n; j++) { 1011 if (PetscAbsScalar(mask[*aj])) { 1012 if (b) bb[i] -= *aa * xx[*aj]; 1013 *aa = 0.0; 1014 } 1015 aa++; 1016 aj++; 1017 } 1018 } 1019 } 1020 if (x && b) { 1021 PetscCall(VecRestoreArray(b, &bb)); 1022 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1023 } 1024 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1025 PetscCall(VecRestoreArray(lmask, &mask)); 1026 PetscCall(VecDestroy(&lmask)); 1027 PetscCall(PetscFree(lrows)); 1028 1029 /* only change matrix nonzero state if pattern was allowed to be changed */ 1030 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1031 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1032 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1033 } 1034 PetscFunctionReturn(PETSC_SUCCESS); 1035 } 1036 1037 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1038 { 1039 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1040 PetscInt nt; 1041 VecScatter Mvctx = a->Mvctx; 1042 1043 PetscFunctionBegin; 1044 PetscCall(VecGetLocalSize(xx, &nt)); 1045 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1046 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1047 PetscUseTypeMethod(a->A, mult, xx, yy); 1048 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1049 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1050 PetscFunctionReturn(PETSC_SUCCESS); 1051 } 1052 1053 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1054 { 1055 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1056 1057 PetscFunctionBegin; 1058 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1059 PetscFunctionReturn(PETSC_SUCCESS); 1060 } 1061 1062 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1063 { 1064 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1065 VecScatter Mvctx = a->Mvctx; 1066 1067 PetscFunctionBegin; 1068 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1069 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1070 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1071 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1072 PetscFunctionReturn(PETSC_SUCCESS); 1073 } 1074 1075 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1078 1079 PetscFunctionBegin; 1080 /* do nondiagonal part */ 1081 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1082 /* do local part */ 1083 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1084 /* add partial results together */ 1085 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1086 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1091 { 1092 MPI_Comm comm; 1093 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1094 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1095 IS Me, Notme; 1096 PetscInt M, N, first, last, *notme, i; 1097 PetscBool lf; 1098 PetscMPIInt size; 1099 1100 PetscFunctionBegin; 1101 /* Easy test: symmetric diagonal block */ 1102 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1103 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1104 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1105 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1106 PetscCallMPI(MPI_Comm_size(comm, &size)); 1107 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1108 1109 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1110 PetscCall(MatGetSize(Amat, &M, &N)); 1111 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1112 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1113 for (i = 0; i < first; i++) notme[i] = i; 1114 for (i = last; i < M; i++) notme[i - last + first] = i; 1115 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1116 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1117 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1118 Aoff = Aoffs[0]; 1119 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1120 Boff = Boffs[0]; 1121 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1122 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1123 PetscCall(MatDestroyMatrices(1, &Boffs)); 1124 PetscCall(ISDestroy(&Me)); 1125 PetscCall(ISDestroy(&Notme)); 1126 PetscCall(PetscFree(notme)); 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1131 { 1132 PetscFunctionBegin; 1133 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1134 PetscFunctionReturn(PETSC_SUCCESS); 1135 } 1136 1137 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1138 { 1139 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1140 1141 PetscFunctionBegin; 1142 /* do nondiagonal part */ 1143 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1144 /* do local part */ 1145 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1146 /* add partial results together */ 1147 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1148 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1149 PetscFunctionReturn(PETSC_SUCCESS); 1150 } 1151 1152 /* 1153 This only works correctly for square matrices where the subblock A->A is the 1154 diagonal block 1155 */ 1156 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1157 { 1158 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1159 1160 PetscFunctionBegin; 1161 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1162 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1163 PetscCall(MatGetDiagonal(a->A, v)); 1164 PetscFunctionReturn(PETSC_SUCCESS); 1165 } 1166 1167 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1168 { 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1170 1171 PetscFunctionBegin; 1172 PetscCall(MatScale(a->A, aa)); 1173 PetscCall(MatScale(a->B, aa)); 1174 PetscFunctionReturn(PETSC_SUCCESS); 1175 } 1176 1177 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1178 { 1179 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1180 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1181 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1182 const PetscInt *garray = aij->garray; 1183 const PetscScalar *aa, *ba; 1184 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1185 PetscInt64 nz, hnz; 1186 PetscInt *rowlens; 1187 PetscInt *colidxs; 1188 PetscScalar *matvals; 1189 PetscMPIInt rank; 1190 1191 PetscFunctionBegin; 1192 PetscCall(PetscViewerSetUp(viewer)); 1193 1194 M = mat->rmap->N; 1195 N = mat->cmap->N; 1196 m = mat->rmap->n; 1197 rs = mat->rmap->rstart; 1198 cs = mat->cmap->rstart; 1199 nz = A->nz + B->nz; 1200 1201 /* write matrix header */ 1202 header[0] = MAT_FILE_CLASSID; 1203 header[1] = M; 1204 header[2] = N; 1205 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1206 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1207 if (rank == 0) { 1208 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1209 else header[3] = (PetscInt)hnz; 1210 } 1211 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1212 1213 /* fill in and store row lengths */ 1214 PetscCall(PetscMalloc1(m, &rowlens)); 1215 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1216 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1217 PetscCall(PetscFree(rowlens)); 1218 1219 /* fill in and store column indices */ 1220 PetscCall(PetscMalloc1(nz, &colidxs)); 1221 for (cnt = 0, i = 0; i < m; i++) { 1222 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1223 if (garray[B->j[jb]] > cs) break; 1224 colidxs[cnt++] = garray[B->j[jb]]; 1225 } 1226 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1227 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1228 } 1229 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1230 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1231 PetscCall(PetscFree(colidxs)); 1232 1233 /* fill in and store nonzero values */ 1234 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1235 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1236 PetscCall(PetscMalloc1(nz, &matvals)); 1237 for (cnt = 0, i = 0; i < m; i++) { 1238 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1239 if (garray[B->j[jb]] > cs) break; 1240 matvals[cnt++] = ba[jb]; 1241 } 1242 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1243 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1244 } 1245 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1246 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1247 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1248 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1249 PetscCall(PetscFree(matvals)); 1250 1251 /* write block size option to the viewer's .info file */ 1252 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1253 PetscFunctionReturn(PETSC_SUCCESS); 1254 } 1255 1256 #include <petscdraw.h> 1257 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1258 { 1259 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1260 PetscMPIInt rank = aij->rank, size = aij->size; 1261 PetscBool isdraw, iascii, isbinary; 1262 PetscViewer sviewer; 1263 PetscViewerFormat format; 1264 1265 PetscFunctionBegin; 1266 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1267 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1268 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1269 if (iascii) { 1270 PetscCall(PetscViewerGetFormat(viewer, &format)); 1271 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1272 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1273 PetscCall(PetscMalloc1(size, &nz)); 1274 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1275 for (i = 0; i < (PetscInt)size; i++) { 1276 nmax = PetscMax(nmax, nz[i]); 1277 nmin = PetscMin(nmin, nz[i]); 1278 navg += nz[i]; 1279 } 1280 PetscCall(PetscFree(nz)); 1281 navg = navg / size; 1282 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1283 PetscFunctionReturn(PETSC_SUCCESS); 1284 } 1285 PetscCall(PetscViewerGetFormat(viewer, &format)); 1286 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1287 MatInfo info; 1288 PetscInt *inodes = NULL; 1289 1290 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1291 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1292 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1293 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1294 if (!inodes) { 1295 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1296 (double)info.memory)); 1297 } else { 1298 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1299 (double)info.memory)); 1300 } 1301 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1302 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1303 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1304 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1305 PetscCall(PetscViewerFlush(viewer)); 1306 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1307 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1308 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1309 PetscFunctionReturn(PETSC_SUCCESS); 1310 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1311 PetscInt inodecount, inodelimit, *inodes; 1312 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1313 if (inodes) { 1314 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1315 } else { 1316 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1317 } 1318 PetscFunctionReturn(PETSC_SUCCESS); 1319 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1320 PetscFunctionReturn(PETSC_SUCCESS); 1321 } 1322 } else if (isbinary) { 1323 if (size == 1) { 1324 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1325 PetscCall(MatView(aij->A, viewer)); 1326 } else { 1327 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1328 } 1329 PetscFunctionReturn(PETSC_SUCCESS); 1330 } else if (iascii && size == 1) { 1331 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1332 PetscCall(MatView(aij->A, viewer)); 1333 PetscFunctionReturn(PETSC_SUCCESS); 1334 } else if (isdraw) { 1335 PetscDraw draw; 1336 PetscBool isnull; 1337 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1338 PetscCall(PetscDrawIsNull(draw, &isnull)); 1339 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1340 } 1341 1342 { /* assemble the entire matrix onto first processor */ 1343 Mat A = NULL, Av; 1344 IS isrow, iscol; 1345 1346 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1347 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1348 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1349 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1350 /* The commented code uses MatCreateSubMatrices instead */ 1351 /* 1352 Mat *AA, A = NULL, Av; 1353 IS isrow,iscol; 1354 1355 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1356 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1357 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1358 if (rank == 0) { 1359 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1360 A = AA[0]; 1361 Av = AA[0]; 1362 } 1363 PetscCall(MatDestroySubMatrices(1,&AA)); 1364 */ 1365 PetscCall(ISDestroy(&iscol)); 1366 PetscCall(ISDestroy(&isrow)); 1367 /* 1368 Everyone has to call to draw the matrix since the graphics waits are 1369 synchronized across all processors that share the PetscDraw object 1370 */ 1371 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1372 if (rank == 0) { 1373 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1374 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1375 } 1376 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 PetscCall(MatDestroy(&A)); 1378 } 1379 PetscFunctionReturn(PETSC_SUCCESS); 1380 } 1381 1382 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1383 { 1384 PetscBool iascii, isdraw, issocket, isbinary; 1385 1386 PetscFunctionBegin; 1387 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1388 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1389 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1390 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1391 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1396 { 1397 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1398 Vec bb1 = NULL; 1399 PetscBool hasop; 1400 1401 PetscFunctionBegin; 1402 if (flag == SOR_APPLY_UPPER) { 1403 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1404 PetscFunctionReturn(PETSC_SUCCESS); 1405 } 1406 1407 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1408 1409 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1410 if (flag & SOR_ZERO_INITIAL_GUESS) { 1411 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1412 its--; 1413 } 1414 1415 while (its--) { 1416 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1417 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1418 1419 /* update rhs: bb1 = bb - B*x */ 1420 PetscCall(VecScale(mat->lvec, -1.0)); 1421 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1422 1423 /* local sweep */ 1424 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1425 } 1426 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1427 if (flag & SOR_ZERO_INITIAL_GUESS) { 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1429 its--; 1430 } 1431 while (its--) { 1432 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1433 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1434 1435 /* update rhs: bb1 = bb - B*x */ 1436 PetscCall(VecScale(mat->lvec, -1.0)); 1437 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1438 1439 /* local sweep */ 1440 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1441 } 1442 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1443 if (flag & SOR_ZERO_INITIAL_GUESS) { 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1445 its--; 1446 } 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec, -1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1457 } 1458 } else if (flag & SOR_EISENSTAT) { 1459 Vec xx1; 1460 1461 PetscCall(VecDuplicate(bb, &xx1)); 1462 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1463 1464 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1466 if (!mat->diag) { 1467 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1468 PetscCall(MatGetDiagonal(matin, mat->diag)); 1469 } 1470 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1471 if (hasop) { 1472 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1473 } else { 1474 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1475 } 1476 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1477 1478 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1479 1480 /* local sweep */ 1481 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1482 PetscCall(VecAXPY(xx, 1.0, xx1)); 1483 PetscCall(VecDestroy(&xx1)); 1484 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1485 1486 PetscCall(VecDestroy(&bb1)); 1487 1488 matin->factorerrortype = mat->A->factorerrortype; 1489 PetscFunctionReturn(PETSC_SUCCESS); 1490 } 1491 1492 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1493 { 1494 Mat aA, aB, Aperm; 1495 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1496 PetscScalar *aa, *ba; 1497 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1498 PetscSF rowsf, sf; 1499 IS parcolp = NULL; 1500 PetscBool done; 1501 1502 PetscFunctionBegin; 1503 PetscCall(MatGetLocalSize(A, &m, &n)); 1504 PetscCall(ISGetIndices(rowp, &rwant)); 1505 PetscCall(ISGetIndices(colp, &cwant)); 1506 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1507 1508 /* Invert row permutation to find out where my rows should go */ 1509 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1510 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1511 PetscCall(PetscSFSetFromOptions(rowsf)); 1512 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1513 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1514 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1515 1516 /* Invert column permutation to find out where my columns should go */ 1517 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1518 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1519 PetscCall(PetscSFSetFromOptions(sf)); 1520 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1521 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1522 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1523 PetscCall(PetscSFDestroy(&sf)); 1524 1525 PetscCall(ISRestoreIndices(rowp, &rwant)); 1526 PetscCall(ISRestoreIndices(colp, &cwant)); 1527 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1528 1529 /* Find out where my gcols should go */ 1530 PetscCall(MatGetSize(aB, NULL, &ng)); 1531 PetscCall(PetscMalloc1(ng, &gcdest)); 1532 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1533 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1534 PetscCall(PetscSFSetFromOptions(sf)); 1535 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1536 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1537 PetscCall(PetscSFDestroy(&sf)); 1538 1539 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1540 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1541 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1542 for (i = 0; i < m; i++) { 1543 PetscInt row = rdest[i]; 1544 PetscMPIInt rowner; 1545 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1546 for (j = ai[i]; j < ai[i + 1]; j++) { 1547 PetscInt col = cdest[aj[j]]; 1548 PetscMPIInt cowner; 1549 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1550 if (rowner == cowner) dnnz[i]++; 1551 else onnz[i]++; 1552 } 1553 for (j = bi[i]; j < bi[i + 1]; j++) { 1554 PetscInt col = gcdest[bj[j]]; 1555 PetscMPIInt cowner; 1556 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1557 if (rowner == cowner) dnnz[i]++; 1558 else onnz[i]++; 1559 } 1560 } 1561 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1562 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1563 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1564 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1565 PetscCall(PetscSFDestroy(&rowsf)); 1566 1567 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1568 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1569 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1570 for (i = 0; i < m; i++) { 1571 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1572 PetscInt j0, rowlen; 1573 rowlen = ai[i + 1] - ai[i]; 1574 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1575 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1576 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1577 } 1578 rowlen = bi[i + 1] - bi[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { 1580 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1582 } 1583 } 1584 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1585 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1586 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1587 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1588 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1589 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1590 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1591 PetscCall(PetscFree3(work, rdest, cdest)); 1592 PetscCall(PetscFree(gcdest)); 1593 if (parcolp) PetscCall(ISDestroy(&colp)); 1594 *B = Aperm; 1595 PetscFunctionReturn(PETSC_SUCCESS); 1596 } 1597 1598 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1599 { 1600 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1601 1602 PetscFunctionBegin; 1603 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1604 if (ghosts) *ghosts = aij->garray; 1605 PetscFunctionReturn(PETSC_SUCCESS); 1606 } 1607 1608 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1609 { 1610 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1611 Mat A = mat->A, B = mat->B; 1612 PetscLogDouble isend[5], irecv[5]; 1613 1614 PetscFunctionBegin; 1615 info->block_size = 1.0; 1616 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1617 1618 isend[0] = info->nz_used; 1619 isend[1] = info->nz_allocated; 1620 isend[2] = info->nz_unneeded; 1621 isend[3] = info->memory; 1622 isend[4] = info->mallocs; 1623 1624 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1625 1626 isend[0] += info->nz_used; 1627 isend[1] += info->nz_allocated; 1628 isend[2] += info->nz_unneeded; 1629 isend[3] += info->memory; 1630 isend[4] += info->mallocs; 1631 if (flag == MAT_LOCAL) { 1632 info->nz_used = isend[0]; 1633 info->nz_allocated = isend[1]; 1634 info->nz_unneeded = isend[2]; 1635 info->memory = isend[3]; 1636 info->mallocs = isend[4]; 1637 } else if (flag == MAT_GLOBAL_MAX) { 1638 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1639 1640 info->nz_used = irecv[0]; 1641 info->nz_allocated = irecv[1]; 1642 info->nz_unneeded = irecv[2]; 1643 info->memory = irecv[3]; 1644 info->mallocs = irecv[4]; 1645 } else if (flag == MAT_GLOBAL_SUM) { 1646 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1647 1648 info->nz_used = irecv[0]; 1649 info->nz_allocated = irecv[1]; 1650 info->nz_unneeded = irecv[2]; 1651 info->memory = irecv[3]; 1652 info->mallocs = irecv[4]; 1653 } 1654 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1655 info->fill_ratio_needed = 0; 1656 info->factor_mallocs = 0; 1657 PetscFunctionReturn(PETSC_SUCCESS); 1658 } 1659 1660 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1661 { 1662 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1663 1664 PetscFunctionBegin; 1665 switch (op) { 1666 case MAT_NEW_NONZERO_LOCATIONS: 1667 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1668 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1669 case MAT_KEEP_NONZERO_PATTERN: 1670 case MAT_NEW_NONZERO_LOCATION_ERR: 1671 case MAT_USE_INODES: 1672 case MAT_IGNORE_ZERO_ENTRIES: 1673 case MAT_FORM_EXPLICIT_TRANSPOSE: 1674 MatCheckPreallocated(A, 1); 1675 PetscCall(MatSetOption(a->A, op, flg)); 1676 PetscCall(MatSetOption(a->B, op, flg)); 1677 break; 1678 case MAT_ROW_ORIENTED: 1679 MatCheckPreallocated(A, 1); 1680 a->roworiented = flg; 1681 1682 PetscCall(MatSetOption(a->A, op, flg)); 1683 PetscCall(MatSetOption(a->B, op, flg)); 1684 break; 1685 case MAT_FORCE_DIAGONAL_ENTRIES: 1686 case MAT_SORTED_FULL: 1687 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1688 break; 1689 case MAT_IGNORE_OFF_PROC_ENTRIES: 1690 a->donotstash = flg; 1691 break; 1692 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1693 case MAT_SPD: 1694 case MAT_SYMMETRIC: 1695 case MAT_STRUCTURALLY_SYMMETRIC: 1696 case MAT_HERMITIAN: 1697 case MAT_SYMMETRY_ETERNAL: 1698 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1699 case MAT_SPD_ETERNAL: 1700 /* if the diagonal matrix is square it inherits some of the properties above */ 1701 break; 1702 case MAT_SUBMAT_SINGLEIS: 1703 A->submat_singleis = flg; 1704 break; 1705 case MAT_STRUCTURE_ONLY: 1706 /* The option is handled directly by MatSetOption() */ 1707 break; 1708 default: 1709 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1710 } 1711 PetscFunctionReturn(PETSC_SUCCESS); 1712 } 1713 1714 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1715 { 1716 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1717 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1718 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1719 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1720 PetscInt *cmap, *idx_p; 1721 1722 PetscFunctionBegin; 1723 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1724 mat->getrowactive = PETSC_TRUE; 1725 1726 if (!mat->rowvalues && (idx || v)) { 1727 /* 1728 allocate enough space to hold information from the longest row. 1729 */ 1730 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1731 PetscInt max = 1, tmp; 1732 for (i = 0; i < matin->rmap->n; i++) { 1733 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1734 if (max < tmp) max = tmp; 1735 } 1736 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1737 } 1738 1739 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1740 lrow = row - rstart; 1741 1742 pvA = &vworkA; 1743 pcA = &cworkA; 1744 pvB = &vworkB; 1745 pcB = &cworkB; 1746 if (!v) { 1747 pvA = NULL; 1748 pvB = NULL; 1749 } 1750 if (!idx) { 1751 pcA = NULL; 1752 if (!v) pcB = NULL; 1753 } 1754 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1755 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1756 nztot = nzA + nzB; 1757 1758 cmap = mat->garray; 1759 if (v || idx) { 1760 if (nztot) { 1761 /* Sort by increasing column numbers, assuming A and B already sorted */ 1762 PetscInt imark = -1; 1763 if (v) { 1764 *v = v_p = mat->rowvalues; 1765 for (i = 0; i < nzB; i++) { 1766 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1767 else break; 1768 } 1769 imark = i; 1770 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1771 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1772 } 1773 if (idx) { 1774 *idx = idx_p = mat->rowindices; 1775 if (imark > -1) { 1776 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1777 } else { 1778 for (i = 0; i < nzB; i++) { 1779 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1780 else break; 1781 } 1782 imark = i; 1783 } 1784 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1785 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1786 } 1787 } else { 1788 if (idx) *idx = NULL; 1789 if (v) *v = NULL; 1790 } 1791 } 1792 *nz = nztot; 1793 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1794 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1795 PetscFunctionReturn(PETSC_SUCCESS); 1796 } 1797 1798 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1799 { 1800 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1801 1802 PetscFunctionBegin; 1803 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1804 aij->getrowactive = PETSC_FALSE; 1805 PetscFunctionReturn(PETSC_SUCCESS); 1806 } 1807 1808 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1809 { 1810 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1811 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1812 PetscInt i, j, cstart = mat->cmap->rstart; 1813 PetscReal sum = 0.0; 1814 const MatScalar *v, *amata, *bmata; 1815 1816 PetscFunctionBegin; 1817 if (aij->size == 1) { 1818 PetscCall(MatNorm(aij->A, type, norm)); 1819 } else { 1820 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1821 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1822 if (type == NORM_FROBENIUS) { 1823 v = amata; 1824 for (i = 0; i < amat->nz; i++) { 1825 sum += PetscRealPart(PetscConj(*v) * (*v)); 1826 v++; 1827 } 1828 v = bmata; 1829 for (i = 0; i < bmat->nz; i++) { 1830 sum += PetscRealPart(PetscConj(*v) * (*v)); 1831 v++; 1832 } 1833 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1834 *norm = PetscSqrtReal(*norm); 1835 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1836 } else if (type == NORM_1) { /* max column norm */ 1837 PetscReal *tmp, *tmp2; 1838 PetscInt *jj, *garray = aij->garray; 1839 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1840 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1841 *norm = 0.0; 1842 v = amata; 1843 jj = amat->j; 1844 for (j = 0; j < amat->nz; j++) { 1845 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1846 v++; 1847 } 1848 v = bmata; 1849 jj = bmat->j; 1850 for (j = 0; j < bmat->nz; j++) { 1851 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1852 v++; 1853 } 1854 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1855 for (j = 0; j < mat->cmap->N; j++) { 1856 if (tmp2[j] > *norm) *norm = tmp2[j]; 1857 } 1858 PetscCall(PetscFree(tmp)); 1859 PetscCall(PetscFree(tmp2)); 1860 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1861 } else if (type == NORM_INFINITY) { /* max row norm */ 1862 PetscReal ntemp = 0.0; 1863 for (j = 0; j < aij->A->rmap->n; j++) { 1864 v = amata + amat->i[j]; 1865 sum = 0.0; 1866 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1867 sum += PetscAbsScalar(*v); 1868 v++; 1869 } 1870 v = bmata + bmat->i[j]; 1871 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1872 sum += PetscAbsScalar(*v); 1873 v++; 1874 } 1875 if (sum > ntemp) ntemp = sum; 1876 } 1877 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1878 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1879 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1880 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1881 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1882 } 1883 PetscFunctionReturn(PETSC_SUCCESS); 1884 } 1885 1886 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1887 { 1888 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1889 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1890 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1891 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1892 Mat B, A_diag, *B_diag; 1893 const MatScalar *pbv, *bv; 1894 1895 PetscFunctionBegin; 1896 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1897 ma = A->rmap->n; 1898 na = A->cmap->n; 1899 mb = a->B->rmap->n; 1900 nb = a->B->cmap->n; 1901 ai = Aloc->i; 1902 aj = Aloc->j; 1903 bi = Bloc->i; 1904 bj = Bloc->j; 1905 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1906 PetscInt *d_nnz, *g_nnz, *o_nnz; 1907 PetscSFNode *oloc; 1908 PETSC_UNUSED PetscSF sf; 1909 1910 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1911 /* compute d_nnz for preallocation */ 1912 PetscCall(PetscArrayzero(d_nnz, na)); 1913 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1914 /* compute local off-diagonal contributions */ 1915 PetscCall(PetscArrayzero(g_nnz, nb)); 1916 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1917 /* map those to global */ 1918 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1919 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1920 PetscCall(PetscSFSetFromOptions(sf)); 1921 PetscCall(PetscArrayzero(o_nnz, na)); 1922 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1923 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1924 PetscCall(PetscSFDestroy(&sf)); 1925 1926 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1927 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1928 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1929 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1930 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1931 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1932 } else { 1933 B = *matout; 1934 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1935 } 1936 1937 b = (Mat_MPIAIJ *)B->data; 1938 A_diag = a->A; 1939 B_diag = &b->A; 1940 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1941 A_diag_ncol = A_diag->cmap->N; 1942 B_diag_ilen = sub_B_diag->ilen; 1943 B_diag_i = sub_B_diag->i; 1944 1945 /* Set ilen for diagonal of B */ 1946 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1947 1948 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1949 very quickly (=without using MatSetValues), because all writes are local. */ 1950 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1951 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1952 1953 /* copy over the B part */ 1954 PetscCall(PetscMalloc1(bi[mb], &cols)); 1955 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1956 pbv = bv; 1957 row = A->rmap->rstart; 1958 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1959 cols_tmp = cols; 1960 for (i = 0; i < mb; i++) { 1961 ncol = bi[i + 1] - bi[i]; 1962 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1963 row++; 1964 if (pbv) pbv += ncol; 1965 if (cols_tmp) cols_tmp += ncol; 1966 } 1967 PetscCall(PetscFree(cols)); 1968 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1969 1970 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1971 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1972 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1973 *matout = B; 1974 } else { 1975 PetscCall(MatHeaderMerge(A, &B)); 1976 } 1977 PetscFunctionReturn(PETSC_SUCCESS); 1978 } 1979 1980 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1981 { 1982 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1983 Mat a = aij->A, b = aij->B; 1984 PetscInt s1, s2, s3; 1985 1986 PetscFunctionBegin; 1987 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1988 if (rr) { 1989 PetscCall(VecGetLocalSize(rr, &s1)); 1990 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1991 /* Overlap communication with computation. */ 1992 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1993 } 1994 if (ll) { 1995 PetscCall(VecGetLocalSize(ll, &s1)); 1996 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1997 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1998 } 1999 /* scale the diagonal block */ 2000 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2001 2002 if (rr) { 2003 /* Do a scatter end and then right scale the off-diagonal block */ 2004 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2005 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2006 } 2007 PetscFunctionReturn(PETSC_SUCCESS); 2008 } 2009 2010 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2011 { 2012 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2013 2014 PetscFunctionBegin; 2015 PetscCall(MatSetUnfactored(a->A)); 2016 PetscFunctionReturn(PETSC_SUCCESS); 2017 } 2018 2019 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2020 { 2021 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2022 Mat a, b, c, d; 2023 PetscBool flg; 2024 2025 PetscFunctionBegin; 2026 a = matA->A; 2027 b = matA->B; 2028 c = matB->A; 2029 d = matB->B; 2030 2031 PetscCall(MatEqual(a, c, &flg)); 2032 if (flg) PetscCall(MatEqual(b, d, &flg)); 2033 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2034 PetscFunctionReturn(PETSC_SUCCESS); 2035 } 2036 2037 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2038 { 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2040 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2041 2042 PetscFunctionBegin; 2043 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2044 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2045 /* because of the column compression in the off-processor part of the matrix a->B, 2046 the number of columns in a->B and b->B may be different, hence we cannot call 2047 the MatCopy() directly on the two parts. If need be, we can provide a more 2048 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2049 then copying the submatrices */ 2050 PetscCall(MatCopy_Basic(A, B, str)); 2051 } else { 2052 PetscCall(MatCopy(a->A, b->A, str)); 2053 PetscCall(MatCopy(a->B, b->B, str)); 2054 } 2055 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2056 PetscFunctionReturn(PETSC_SUCCESS); 2057 } 2058 2059 /* 2060 Computes the number of nonzeros per row needed for preallocation when X and Y 2061 have different nonzero structure. 2062 */ 2063 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2064 { 2065 PetscInt i, j, k, nzx, nzy; 2066 2067 PetscFunctionBegin; 2068 /* Set the number of nonzeros in the new matrix */ 2069 for (i = 0; i < m; i++) { 2070 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2071 nzx = xi[i + 1] - xi[i]; 2072 nzy = yi[i + 1] - yi[i]; 2073 nnz[i] = 0; 2074 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2075 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2076 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2077 nnz[i]++; 2078 } 2079 for (; k < nzy; k++) nnz[i]++; 2080 } 2081 PetscFunctionReturn(PETSC_SUCCESS); 2082 } 2083 2084 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2085 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2086 { 2087 PetscInt m = Y->rmap->N; 2088 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2089 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2090 2091 PetscFunctionBegin; 2092 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2093 PetscFunctionReturn(PETSC_SUCCESS); 2094 } 2095 2096 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2097 { 2098 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2099 2100 PetscFunctionBegin; 2101 if (str == SAME_NONZERO_PATTERN) { 2102 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2103 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2104 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2105 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2106 } else { 2107 Mat B; 2108 PetscInt *nnz_d, *nnz_o; 2109 2110 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2111 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2112 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2113 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2114 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2115 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2116 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2117 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2118 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2119 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2120 PetscCall(MatHeaderMerge(Y, &B)); 2121 PetscCall(PetscFree(nnz_d)); 2122 PetscCall(PetscFree(nnz_o)); 2123 } 2124 PetscFunctionReturn(PETSC_SUCCESS); 2125 } 2126 2127 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2128 2129 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2130 { 2131 PetscFunctionBegin; 2132 if (PetscDefined(USE_COMPLEX)) { 2133 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2134 2135 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2136 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2137 } 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2144 2145 PetscFunctionBegin; 2146 PetscCall(MatRealPart(a->A)); 2147 PetscCall(MatRealPart(a->B)); 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2152 { 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2154 2155 PetscFunctionBegin; 2156 PetscCall(MatImaginaryPart(a->A)); 2157 PetscCall(MatImaginaryPart(a->B)); 2158 PetscFunctionReturn(PETSC_SUCCESS); 2159 } 2160 2161 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2162 { 2163 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2164 PetscInt i, *idxb = NULL, m = A->rmap->n; 2165 PetscScalar *va, *vv; 2166 Vec vB, vA; 2167 const PetscScalar *vb; 2168 2169 PetscFunctionBegin; 2170 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2171 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2172 2173 PetscCall(VecGetArrayWrite(vA, &va)); 2174 if (idx) { 2175 for (i = 0; i < m; i++) { 2176 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2177 } 2178 } 2179 2180 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2181 PetscCall(PetscMalloc1(m, &idxb)); 2182 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2183 2184 PetscCall(VecGetArrayWrite(v, &vv)); 2185 PetscCall(VecGetArrayRead(vB, &vb)); 2186 for (i = 0; i < m; i++) { 2187 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2188 vv[i] = vb[i]; 2189 if (idx) idx[i] = a->garray[idxb[i]]; 2190 } else { 2191 vv[i] = va[i]; 2192 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2193 } 2194 } 2195 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2196 PetscCall(VecRestoreArrayWrite(vA, &va)); 2197 PetscCall(VecRestoreArrayRead(vB, &vb)); 2198 PetscCall(PetscFree(idxb)); 2199 PetscCall(VecDestroy(&vA)); 2200 PetscCall(VecDestroy(&vB)); 2201 PetscFunctionReturn(PETSC_SUCCESS); 2202 } 2203 2204 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2205 { 2206 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2207 PetscInt m = A->rmap->n, n = A->cmap->n; 2208 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2209 PetscInt *cmap = mat->garray; 2210 PetscInt *diagIdx, *offdiagIdx; 2211 Vec diagV, offdiagV; 2212 PetscScalar *a, *diagA, *offdiagA; 2213 const PetscScalar *ba, *bav; 2214 PetscInt r, j, col, ncols, *bi, *bj; 2215 Mat B = mat->B; 2216 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2217 2218 PetscFunctionBegin; 2219 /* When a process holds entire A and other processes have no entry */ 2220 if (A->cmap->N == n) { 2221 PetscCall(VecGetArrayWrite(v, &diagA)); 2222 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2223 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2224 PetscCall(VecDestroy(&diagV)); 2225 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2226 PetscFunctionReturn(PETSC_SUCCESS); 2227 } else if (n == 0) { 2228 if (m) { 2229 PetscCall(VecGetArrayWrite(v, &a)); 2230 for (r = 0; r < m; r++) { 2231 a[r] = 0.0; 2232 if (idx) idx[r] = -1; 2233 } 2234 PetscCall(VecRestoreArrayWrite(v, &a)); 2235 } 2236 PetscFunctionReturn(PETSC_SUCCESS); 2237 } 2238 2239 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2240 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2241 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2242 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2243 2244 /* Get offdiagIdx[] for implicit 0.0 */ 2245 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2246 ba = bav; 2247 bi = b->i; 2248 bj = b->j; 2249 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2250 for (r = 0; r < m; r++) { 2251 ncols = bi[r + 1] - bi[r]; 2252 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2253 offdiagA[r] = *ba; 2254 offdiagIdx[r] = cmap[0]; 2255 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2256 offdiagA[r] = 0.0; 2257 2258 /* Find first hole in the cmap */ 2259 for (j = 0; j < ncols; j++) { 2260 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2261 if (col > j && j < cstart) { 2262 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2263 break; 2264 } else if (col > j + n && j >= cstart) { 2265 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2266 break; 2267 } 2268 } 2269 if (j == ncols && ncols < A->cmap->N - n) { 2270 /* a hole is outside compressed Bcols */ 2271 if (ncols == 0) { 2272 if (cstart) { 2273 offdiagIdx[r] = 0; 2274 } else offdiagIdx[r] = cend; 2275 } else { /* ncols > 0 */ 2276 offdiagIdx[r] = cmap[ncols - 1] + 1; 2277 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2278 } 2279 } 2280 } 2281 2282 for (j = 0; j < ncols; j++) { 2283 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2284 offdiagA[r] = *ba; 2285 offdiagIdx[r] = cmap[*bj]; 2286 } 2287 ba++; 2288 bj++; 2289 } 2290 } 2291 2292 PetscCall(VecGetArrayWrite(v, &a)); 2293 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2294 for (r = 0; r < m; ++r) { 2295 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2296 a[r] = diagA[r]; 2297 if (idx) idx[r] = cstart + diagIdx[r]; 2298 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2299 a[r] = diagA[r]; 2300 if (idx) { 2301 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2302 idx[r] = cstart + diagIdx[r]; 2303 } else idx[r] = offdiagIdx[r]; 2304 } 2305 } else { 2306 a[r] = offdiagA[r]; 2307 if (idx) idx[r] = offdiagIdx[r]; 2308 } 2309 } 2310 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2311 PetscCall(VecRestoreArrayWrite(v, &a)); 2312 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2313 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2314 PetscCall(VecDestroy(&diagV)); 2315 PetscCall(VecDestroy(&offdiagV)); 2316 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2317 PetscFunctionReturn(PETSC_SUCCESS); 2318 } 2319 2320 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2321 { 2322 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2323 PetscInt m = A->rmap->n, n = A->cmap->n; 2324 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2325 PetscInt *cmap = mat->garray; 2326 PetscInt *diagIdx, *offdiagIdx; 2327 Vec diagV, offdiagV; 2328 PetscScalar *a, *diagA, *offdiagA; 2329 const PetscScalar *ba, *bav; 2330 PetscInt r, j, col, ncols, *bi, *bj; 2331 Mat B = mat->B; 2332 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2333 2334 PetscFunctionBegin; 2335 /* When a process holds entire A and other processes have no entry */ 2336 if (A->cmap->N == n) { 2337 PetscCall(VecGetArrayWrite(v, &diagA)); 2338 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2339 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2340 PetscCall(VecDestroy(&diagV)); 2341 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2342 PetscFunctionReturn(PETSC_SUCCESS); 2343 } else if (n == 0) { 2344 if (m) { 2345 PetscCall(VecGetArrayWrite(v, &a)); 2346 for (r = 0; r < m; r++) { 2347 a[r] = PETSC_MAX_REAL; 2348 if (idx) idx[r] = -1; 2349 } 2350 PetscCall(VecRestoreArrayWrite(v, &a)); 2351 } 2352 PetscFunctionReturn(PETSC_SUCCESS); 2353 } 2354 2355 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2357 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2358 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2359 2360 /* Get offdiagIdx[] for implicit 0.0 */ 2361 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2362 ba = bav; 2363 bi = b->i; 2364 bj = b->j; 2365 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2366 for (r = 0; r < m; r++) { 2367 ncols = bi[r + 1] - bi[r]; 2368 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2369 offdiagA[r] = *ba; 2370 offdiagIdx[r] = cmap[0]; 2371 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2372 offdiagA[r] = 0.0; 2373 2374 /* Find first hole in the cmap */ 2375 for (j = 0; j < ncols; j++) { 2376 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2377 if (col > j && j < cstart) { 2378 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2379 break; 2380 } else if (col > j + n && j >= cstart) { 2381 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2382 break; 2383 } 2384 } 2385 if (j == ncols && ncols < A->cmap->N - n) { 2386 /* a hole is outside compressed Bcols */ 2387 if (ncols == 0) { 2388 if (cstart) { 2389 offdiagIdx[r] = 0; 2390 } else offdiagIdx[r] = cend; 2391 } else { /* ncols > 0 */ 2392 offdiagIdx[r] = cmap[ncols - 1] + 1; 2393 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2394 } 2395 } 2396 } 2397 2398 for (j = 0; j < ncols; j++) { 2399 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2400 offdiagA[r] = *ba; 2401 offdiagIdx[r] = cmap[*bj]; 2402 } 2403 ba++; 2404 bj++; 2405 } 2406 } 2407 2408 PetscCall(VecGetArrayWrite(v, &a)); 2409 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2410 for (r = 0; r < m; ++r) { 2411 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2412 a[r] = diagA[r]; 2413 if (idx) idx[r] = cstart + diagIdx[r]; 2414 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2415 a[r] = diagA[r]; 2416 if (idx) { 2417 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2418 idx[r] = cstart + diagIdx[r]; 2419 } else idx[r] = offdiagIdx[r]; 2420 } 2421 } else { 2422 a[r] = offdiagA[r]; 2423 if (idx) idx[r] = offdiagIdx[r]; 2424 } 2425 } 2426 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2427 PetscCall(VecRestoreArrayWrite(v, &a)); 2428 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2429 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2430 PetscCall(VecDestroy(&diagV)); 2431 PetscCall(VecDestroy(&offdiagV)); 2432 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2433 PetscFunctionReturn(PETSC_SUCCESS); 2434 } 2435 2436 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2437 { 2438 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2439 PetscInt m = A->rmap->n, n = A->cmap->n; 2440 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2441 PetscInt *cmap = mat->garray; 2442 PetscInt *diagIdx, *offdiagIdx; 2443 Vec diagV, offdiagV; 2444 PetscScalar *a, *diagA, *offdiagA; 2445 const PetscScalar *ba, *bav; 2446 PetscInt r, j, col, ncols, *bi, *bj; 2447 Mat B = mat->B; 2448 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2449 2450 PetscFunctionBegin; 2451 /* When a process holds entire A and other processes have no entry */ 2452 if (A->cmap->N == n) { 2453 PetscCall(VecGetArrayWrite(v, &diagA)); 2454 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2455 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2456 PetscCall(VecDestroy(&diagV)); 2457 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2458 PetscFunctionReturn(PETSC_SUCCESS); 2459 } else if (n == 0) { 2460 if (m) { 2461 PetscCall(VecGetArrayWrite(v, &a)); 2462 for (r = 0; r < m; r++) { 2463 a[r] = PETSC_MIN_REAL; 2464 if (idx) idx[r] = -1; 2465 } 2466 PetscCall(VecRestoreArrayWrite(v, &a)); 2467 } 2468 PetscFunctionReturn(PETSC_SUCCESS); 2469 } 2470 2471 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2472 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2473 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2474 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2475 2476 /* Get offdiagIdx[] for implicit 0.0 */ 2477 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2478 ba = bav; 2479 bi = b->i; 2480 bj = b->j; 2481 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2482 for (r = 0; r < m; r++) { 2483 ncols = bi[r + 1] - bi[r]; 2484 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2485 offdiagA[r] = *ba; 2486 offdiagIdx[r] = cmap[0]; 2487 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2488 offdiagA[r] = 0.0; 2489 2490 /* Find first hole in the cmap */ 2491 for (j = 0; j < ncols; j++) { 2492 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2493 if (col > j && j < cstart) { 2494 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2495 break; 2496 } else if (col > j + n && j >= cstart) { 2497 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2498 break; 2499 } 2500 } 2501 if (j == ncols && ncols < A->cmap->N - n) { 2502 /* a hole is outside compressed Bcols */ 2503 if (ncols == 0) { 2504 if (cstart) { 2505 offdiagIdx[r] = 0; 2506 } else offdiagIdx[r] = cend; 2507 } else { /* ncols > 0 */ 2508 offdiagIdx[r] = cmap[ncols - 1] + 1; 2509 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2510 } 2511 } 2512 } 2513 2514 for (j = 0; j < ncols; j++) { 2515 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2516 offdiagA[r] = *ba; 2517 offdiagIdx[r] = cmap[*bj]; 2518 } 2519 ba++; 2520 bj++; 2521 } 2522 } 2523 2524 PetscCall(VecGetArrayWrite(v, &a)); 2525 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2526 for (r = 0; r < m; ++r) { 2527 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2528 a[r] = diagA[r]; 2529 if (idx) idx[r] = cstart + diagIdx[r]; 2530 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2531 a[r] = diagA[r]; 2532 if (idx) { 2533 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2534 idx[r] = cstart + diagIdx[r]; 2535 } else idx[r] = offdiagIdx[r]; 2536 } 2537 } else { 2538 a[r] = offdiagA[r]; 2539 if (idx) idx[r] = offdiagIdx[r]; 2540 } 2541 } 2542 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2543 PetscCall(VecRestoreArrayWrite(v, &a)); 2544 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2545 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2546 PetscCall(VecDestroy(&diagV)); 2547 PetscCall(VecDestroy(&offdiagV)); 2548 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2549 PetscFunctionReturn(PETSC_SUCCESS); 2550 } 2551 2552 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2553 { 2554 Mat *dummy; 2555 2556 PetscFunctionBegin; 2557 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2558 *newmat = *dummy; 2559 PetscCall(PetscFree(dummy)); 2560 PetscFunctionReturn(PETSC_SUCCESS); 2561 } 2562 2563 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2564 { 2565 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2566 2567 PetscFunctionBegin; 2568 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2569 A->factorerrortype = a->A->factorerrortype; 2570 PetscFunctionReturn(PETSC_SUCCESS); 2571 } 2572 2573 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2574 { 2575 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2576 2577 PetscFunctionBegin; 2578 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2579 PetscCall(MatSetRandom(aij->A, rctx)); 2580 if (x->assembled) { 2581 PetscCall(MatSetRandom(aij->B, rctx)); 2582 } else { 2583 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2584 } 2585 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2586 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2587 PetscFunctionReturn(PETSC_SUCCESS); 2588 } 2589 2590 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2591 { 2592 PetscFunctionBegin; 2593 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2594 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2595 PetscFunctionReturn(PETSC_SUCCESS); 2596 } 2597 2598 /*@ 2599 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2600 2601 Not Collective 2602 2603 Input Parameter: 2604 . A - the matrix 2605 2606 Output Parameter: 2607 . nz - the number of nonzeros 2608 2609 Level: advanced 2610 2611 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2612 @*/ 2613 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2614 { 2615 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2616 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2617 PetscBool isaij; 2618 2619 PetscFunctionBegin; 2620 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2621 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2622 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2623 PetscFunctionReturn(PETSC_SUCCESS); 2624 } 2625 2626 /*@ 2627 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2628 2629 Collective 2630 2631 Input Parameters: 2632 + A - the matrix 2633 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2634 2635 Level: advanced 2636 2637 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2638 @*/ 2639 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2640 { 2641 PetscFunctionBegin; 2642 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2643 PetscFunctionReturn(PETSC_SUCCESS); 2644 } 2645 2646 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2647 { 2648 PetscBool sc = PETSC_FALSE, flg; 2649 2650 PetscFunctionBegin; 2651 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2652 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2653 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2654 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2655 PetscOptionsHeadEnd(); 2656 PetscFunctionReturn(PETSC_SUCCESS); 2657 } 2658 2659 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2660 { 2661 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2662 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2663 2664 PetscFunctionBegin; 2665 if (!Y->preallocated) { 2666 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2667 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2668 PetscInt nonew = aij->nonew; 2669 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2670 aij->nonew = nonew; 2671 } 2672 PetscCall(MatShift_Basic(Y, a)); 2673 PetscFunctionReturn(PETSC_SUCCESS); 2674 } 2675 2676 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2677 { 2678 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2679 2680 PetscFunctionBegin; 2681 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2682 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2683 if (d) { 2684 PetscInt rstart; 2685 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2686 *d += rstart; 2687 } 2688 PetscFunctionReturn(PETSC_SUCCESS); 2689 } 2690 2691 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2692 { 2693 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2694 2695 PetscFunctionBegin; 2696 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2697 PetscFunctionReturn(PETSC_SUCCESS); 2698 } 2699 2700 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2701 { 2702 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2703 2704 PetscFunctionBegin; 2705 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2706 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2707 PetscFunctionReturn(PETSC_SUCCESS); 2708 } 2709 2710 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2711 MatGetRow_MPIAIJ, 2712 MatRestoreRow_MPIAIJ, 2713 MatMult_MPIAIJ, 2714 /* 4*/ MatMultAdd_MPIAIJ, 2715 MatMultTranspose_MPIAIJ, 2716 MatMultTransposeAdd_MPIAIJ, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*10*/ NULL, 2721 NULL, 2722 NULL, 2723 MatSOR_MPIAIJ, 2724 MatTranspose_MPIAIJ, 2725 /*15*/ MatGetInfo_MPIAIJ, 2726 MatEqual_MPIAIJ, 2727 MatGetDiagonal_MPIAIJ, 2728 MatDiagonalScale_MPIAIJ, 2729 MatNorm_MPIAIJ, 2730 /*20*/ MatAssemblyBegin_MPIAIJ, 2731 MatAssemblyEnd_MPIAIJ, 2732 MatSetOption_MPIAIJ, 2733 MatZeroEntries_MPIAIJ, 2734 /*24*/ MatZeroRows_MPIAIJ, 2735 NULL, 2736 NULL, 2737 NULL, 2738 NULL, 2739 /*29*/ MatSetUp_MPI_Hash, 2740 NULL, 2741 NULL, 2742 MatGetDiagonalBlock_MPIAIJ, 2743 NULL, 2744 /*34*/ MatDuplicate_MPIAIJ, 2745 NULL, 2746 NULL, 2747 NULL, 2748 NULL, 2749 /*39*/ MatAXPY_MPIAIJ, 2750 MatCreateSubMatrices_MPIAIJ, 2751 MatIncreaseOverlap_MPIAIJ, 2752 MatGetValues_MPIAIJ, 2753 MatCopy_MPIAIJ, 2754 /*44*/ MatGetRowMax_MPIAIJ, 2755 MatScale_MPIAIJ, 2756 MatShift_MPIAIJ, 2757 MatDiagonalSet_MPIAIJ, 2758 MatZeroRowsColumns_MPIAIJ, 2759 /*49*/ MatSetRandom_MPIAIJ, 2760 MatGetRowIJ_MPIAIJ, 2761 MatRestoreRowIJ_MPIAIJ, 2762 NULL, 2763 NULL, 2764 /*54*/ MatFDColoringCreate_MPIXAIJ, 2765 NULL, 2766 MatSetUnfactored_MPIAIJ, 2767 MatPermute_MPIAIJ, 2768 NULL, 2769 /*59*/ MatCreateSubMatrix_MPIAIJ, 2770 MatDestroy_MPIAIJ, 2771 MatView_MPIAIJ, 2772 NULL, 2773 NULL, 2774 /*64*/ NULL, 2775 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 NULL, 2779 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2780 MatGetRowMinAbs_MPIAIJ, 2781 NULL, 2782 NULL, 2783 NULL, 2784 NULL, 2785 /*75*/ MatFDColoringApply_AIJ, 2786 MatSetFromOptions_MPIAIJ, 2787 NULL, 2788 NULL, 2789 MatFindZeroDiagonals_MPIAIJ, 2790 /*80*/ NULL, 2791 NULL, 2792 NULL, 2793 /*83*/ MatLoad_MPIAIJ, 2794 MatIsSymmetric_MPIAIJ, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*89*/ NULL, 2800 NULL, 2801 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2802 NULL, 2803 NULL, 2804 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2805 NULL, 2806 NULL, 2807 NULL, 2808 MatBindToCPU_MPIAIJ, 2809 /*99*/ MatProductSetFromOptions_MPIAIJ, 2810 NULL, 2811 NULL, 2812 MatConjugate_MPIAIJ, 2813 NULL, 2814 /*104*/ MatSetValuesRow_MPIAIJ, 2815 MatRealPart_MPIAIJ, 2816 MatImaginaryPart_MPIAIJ, 2817 NULL, 2818 NULL, 2819 /*109*/ NULL, 2820 NULL, 2821 MatGetRowMin_MPIAIJ, 2822 NULL, 2823 MatMissingDiagonal_MPIAIJ, 2824 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2825 NULL, 2826 MatGetGhosts_MPIAIJ, 2827 NULL, 2828 NULL, 2829 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2830 NULL, 2831 NULL, 2832 NULL, 2833 MatGetMultiProcBlock_MPIAIJ, 2834 /*124*/ MatFindNonzeroRows_MPIAIJ, 2835 MatGetColumnReductions_MPIAIJ, 2836 MatInvertBlockDiagonal_MPIAIJ, 2837 MatInvertVariableBlockDiagonal_MPIAIJ, 2838 MatCreateSubMatricesMPI_MPIAIJ, 2839 /*129*/ NULL, 2840 NULL, 2841 NULL, 2842 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2843 NULL, 2844 /*134*/ NULL, 2845 NULL, 2846 NULL, 2847 NULL, 2848 NULL, 2849 /*139*/ MatSetBlockSizes_MPIAIJ, 2850 NULL, 2851 NULL, 2852 MatFDColoringSetUp_MPIXAIJ, 2853 MatFindOffBlockDiagonalEntries_MPIAIJ, 2854 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2855 /*145*/ NULL, 2856 NULL, 2857 NULL, 2858 MatCreateGraph_Simple_AIJ, 2859 NULL, 2860 /*150*/ NULL, 2861 MatEliminateZeros_MPIAIJ}; 2862 2863 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2864 { 2865 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2866 2867 PetscFunctionBegin; 2868 PetscCall(MatStoreValues(aij->A)); 2869 PetscCall(MatStoreValues(aij->B)); 2870 PetscFunctionReturn(PETSC_SUCCESS); 2871 } 2872 2873 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2874 { 2875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2876 2877 PetscFunctionBegin; 2878 PetscCall(MatRetrieveValues(aij->A)); 2879 PetscCall(MatRetrieveValues(aij->B)); 2880 PetscFunctionReturn(PETSC_SUCCESS); 2881 } 2882 2883 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2884 { 2885 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2886 PetscMPIInt size; 2887 2888 PetscFunctionBegin; 2889 if (B->hash_active) { 2890 B->ops[0] = b->cops; 2891 B->hash_active = PETSC_FALSE; 2892 } 2893 PetscCall(PetscLayoutSetUp(B->rmap)); 2894 PetscCall(PetscLayoutSetUp(B->cmap)); 2895 2896 #if defined(PETSC_USE_CTABLE) 2897 PetscCall(PetscHMapIDestroy(&b->colmap)); 2898 #else 2899 PetscCall(PetscFree(b->colmap)); 2900 #endif 2901 PetscCall(PetscFree(b->garray)); 2902 PetscCall(VecDestroy(&b->lvec)); 2903 PetscCall(VecScatterDestroy(&b->Mvctx)); 2904 2905 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2906 PetscCall(MatDestroy(&b->B)); 2907 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2908 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2909 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2910 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2911 2912 PetscCall(MatDestroy(&b->A)); 2913 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2914 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2915 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2916 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2917 2918 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2919 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2920 B->preallocated = PETSC_TRUE; 2921 B->was_assembled = PETSC_FALSE; 2922 B->assembled = PETSC_FALSE; 2923 PetscFunctionReturn(PETSC_SUCCESS); 2924 } 2925 2926 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2927 { 2928 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2929 2930 PetscFunctionBegin; 2931 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2932 PetscCall(PetscLayoutSetUp(B->rmap)); 2933 PetscCall(PetscLayoutSetUp(B->cmap)); 2934 2935 #if defined(PETSC_USE_CTABLE) 2936 PetscCall(PetscHMapIDestroy(&b->colmap)); 2937 #else 2938 PetscCall(PetscFree(b->colmap)); 2939 #endif 2940 PetscCall(PetscFree(b->garray)); 2941 PetscCall(VecDestroy(&b->lvec)); 2942 PetscCall(VecScatterDestroy(&b->Mvctx)); 2943 2944 PetscCall(MatResetPreallocation(b->A)); 2945 PetscCall(MatResetPreallocation(b->B)); 2946 B->preallocated = PETSC_TRUE; 2947 B->was_assembled = PETSC_FALSE; 2948 B->assembled = PETSC_FALSE; 2949 PetscFunctionReturn(PETSC_SUCCESS); 2950 } 2951 2952 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2953 { 2954 Mat mat; 2955 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2956 2957 PetscFunctionBegin; 2958 *newmat = NULL; 2959 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2960 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2961 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2962 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2963 a = (Mat_MPIAIJ *)mat->data; 2964 2965 mat->factortype = matin->factortype; 2966 mat->assembled = matin->assembled; 2967 mat->insertmode = NOT_SET_VALUES; 2968 mat->preallocated = matin->preallocated; 2969 2970 a->size = oldmat->size; 2971 a->rank = oldmat->rank; 2972 a->donotstash = oldmat->donotstash; 2973 a->roworiented = oldmat->roworiented; 2974 a->rowindices = NULL; 2975 a->rowvalues = NULL; 2976 a->getrowactive = PETSC_FALSE; 2977 2978 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2979 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2980 2981 if (oldmat->colmap) { 2982 #if defined(PETSC_USE_CTABLE) 2983 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2984 #else 2985 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2986 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2987 #endif 2988 } else a->colmap = NULL; 2989 if (oldmat->garray) { 2990 PetscInt len; 2991 len = oldmat->B->cmap->n; 2992 PetscCall(PetscMalloc1(len + 1, &a->garray)); 2993 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2994 } else a->garray = NULL; 2995 2996 /* It may happen MatDuplicate is called with a non-assembled matrix 2997 In fact, MatDuplicate only requires the matrix to be preallocated 2998 This may happen inside a DMCreateMatrix_Shell */ 2999 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3000 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3001 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3002 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3003 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3004 *newmat = mat; 3005 PetscFunctionReturn(PETSC_SUCCESS); 3006 } 3007 3008 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3009 { 3010 PetscBool isbinary, ishdf5; 3011 3012 PetscFunctionBegin; 3013 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3014 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3015 /* force binary viewer to load .info file if it has not yet done so */ 3016 PetscCall(PetscViewerSetUp(viewer)); 3017 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3019 if (isbinary) { 3020 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3021 } else if (ishdf5) { 3022 #if defined(PETSC_HAVE_HDF5) 3023 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3024 #else 3025 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3026 #endif 3027 } else { 3028 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3029 } 3030 PetscFunctionReturn(PETSC_SUCCESS); 3031 } 3032 3033 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3034 { 3035 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3036 PetscInt *rowidxs, *colidxs; 3037 PetscScalar *matvals; 3038 3039 PetscFunctionBegin; 3040 PetscCall(PetscViewerSetUp(viewer)); 3041 3042 /* read in matrix header */ 3043 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3044 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3045 M = header[1]; 3046 N = header[2]; 3047 nz = header[3]; 3048 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3049 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3050 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3051 3052 /* set block sizes from the viewer's .info file */ 3053 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3054 /* set global sizes if not set already */ 3055 if (mat->rmap->N < 0) mat->rmap->N = M; 3056 if (mat->cmap->N < 0) mat->cmap->N = N; 3057 PetscCall(PetscLayoutSetUp(mat->rmap)); 3058 PetscCall(PetscLayoutSetUp(mat->cmap)); 3059 3060 /* check if the matrix sizes are correct */ 3061 PetscCall(MatGetSize(mat, &rows, &cols)); 3062 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3063 3064 /* read in row lengths and build row indices */ 3065 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3066 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3067 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3068 rowidxs[0] = 0; 3069 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3070 if (nz != PETSC_MAX_INT) { 3071 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3072 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3073 } 3074 3075 /* read in column indices and matrix values */ 3076 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3077 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3078 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3079 /* store matrix indices and values */ 3080 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3081 PetscCall(PetscFree(rowidxs)); 3082 PetscCall(PetscFree2(colidxs, matvals)); 3083 PetscFunctionReturn(PETSC_SUCCESS); 3084 } 3085 3086 /* Not scalable because of ISAllGather() unless getting all columns. */ 3087 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3088 { 3089 IS iscol_local; 3090 PetscBool isstride; 3091 PetscMPIInt lisstride = 0, gisstride; 3092 3093 PetscFunctionBegin; 3094 /* check if we are grabbing all columns*/ 3095 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3096 3097 if (isstride) { 3098 PetscInt start, len, mstart, mlen; 3099 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3100 PetscCall(ISGetLocalSize(iscol, &len)); 3101 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3102 if (mstart == start && mlen - mstart == len) lisstride = 1; 3103 } 3104 3105 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3106 if (gisstride) { 3107 PetscInt N; 3108 PetscCall(MatGetSize(mat, NULL, &N)); 3109 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3110 PetscCall(ISSetIdentity(iscol_local)); 3111 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3112 } else { 3113 PetscInt cbs; 3114 PetscCall(ISGetBlockSize(iscol, &cbs)); 3115 PetscCall(ISAllGather(iscol, &iscol_local)); 3116 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3117 } 3118 3119 *isseq = iscol_local; 3120 PetscFunctionReturn(PETSC_SUCCESS); 3121 } 3122 3123 /* 3124 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3125 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3126 3127 Input Parameters: 3128 + mat - matrix 3129 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3130 i.e., mat->rstart <= isrow[i] < mat->rend 3131 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3132 i.e., mat->cstart <= iscol[i] < mat->cend 3133 3134 Output Parameters: 3135 + isrow_d - sequential row index set for retrieving mat->A 3136 . iscol_d - sequential column index set for retrieving mat->A 3137 . iscol_o - sequential column index set for retrieving mat->B 3138 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3139 */ 3140 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3141 { 3142 Vec x, cmap; 3143 const PetscInt *is_idx; 3144 PetscScalar *xarray, *cmaparray; 3145 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3147 Mat B = a->B; 3148 Vec lvec = a->lvec, lcmap; 3149 PetscInt i, cstart, cend, Bn = B->cmap->N; 3150 MPI_Comm comm; 3151 VecScatter Mvctx = a->Mvctx; 3152 3153 PetscFunctionBegin; 3154 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3155 PetscCall(ISGetLocalSize(iscol, &ncols)); 3156 3157 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3158 PetscCall(MatCreateVecs(mat, &x, NULL)); 3159 PetscCall(VecSet(x, -1.0)); 3160 PetscCall(VecDuplicate(x, &cmap)); 3161 PetscCall(VecSet(cmap, -1.0)); 3162 3163 /* Get start indices */ 3164 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3165 isstart -= ncols; 3166 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3167 3168 PetscCall(ISGetIndices(iscol, &is_idx)); 3169 PetscCall(VecGetArray(x, &xarray)); 3170 PetscCall(VecGetArray(cmap, &cmaparray)); 3171 PetscCall(PetscMalloc1(ncols, &idx)); 3172 for (i = 0; i < ncols; i++) { 3173 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3174 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3175 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3176 } 3177 PetscCall(VecRestoreArray(x, &xarray)); 3178 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3179 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3180 3181 /* Get iscol_d */ 3182 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3183 PetscCall(ISGetBlockSize(iscol, &i)); 3184 PetscCall(ISSetBlockSize(*iscol_d, i)); 3185 3186 /* Get isrow_d */ 3187 PetscCall(ISGetLocalSize(isrow, &m)); 3188 rstart = mat->rmap->rstart; 3189 PetscCall(PetscMalloc1(m, &idx)); 3190 PetscCall(ISGetIndices(isrow, &is_idx)); 3191 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3192 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3193 3194 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3195 PetscCall(ISGetBlockSize(isrow, &i)); 3196 PetscCall(ISSetBlockSize(*isrow_d, i)); 3197 3198 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3199 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3200 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3201 3202 PetscCall(VecDuplicate(lvec, &lcmap)); 3203 3204 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3205 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3206 3207 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3208 /* off-process column indices */ 3209 count = 0; 3210 PetscCall(PetscMalloc1(Bn, &idx)); 3211 PetscCall(PetscMalloc1(Bn, &cmap1)); 3212 3213 PetscCall(VecGetArray(lvec, &xarray)); 3214 PetscCall(VecGetArray(lcmap, &cmaparray)); 3215 for (i = 0; i < Bn; i++) { 3216 if (PetscRealPart(xarray[i]) > -1.0) { 3217 idx[count] = i; /* local column index in off-diagonal part B */ 3218 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3219 count++; 3220 } 3221 } 3222 PetscCall(VecRestoreArray(lvec, &xarray)); 3223 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3224 3225 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3226 /* cannot ensure iscol_o has same blocksize as iscol! */ 3227 3228 PetscCall(PetscFree(idx)); 3229 *garray = cmap1; 3230 3231 PetscCall(VecDestroy(&x)); 3232 PetscCall(VecDestroy(&cmap)); 3233 PetscCall(VecDestroy(&lcmap)); 3234 PetscFunctionReturn(PETSC_SUCCESS); 3235 } 3236 3237 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3238 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3239 { 3240 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3241 Mat M = NULL; 3242 MPI_Comm comm; 3243 IS iscol_d, isrow_d, iscol_o; 3244 Mat Asub = NULL, Bsub = NULL; 3245 PetscInt n; 3246 3247 PetscFunctionBegin; 3248 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3249 3250 if (call == MAT_REUSE_MATRIX) { 3251 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3252 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3253 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3254 3255 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3256 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3257 3258 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3259 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3260 3261 /* Update diagonal and off-diagonal portions of submat */ 3262 asub = (Mat_MPIAIJ *)(*submat)->data; 3263 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3264 PetscCall(ISGetLocalSize(iscol_o, &n)); 3265 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3266 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3267 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3268 3269 } else { /* call == MAT_INITIAL_MATRIX) */ 3270 const PetscInt *garray; 3271 PetscInt BsubN; 3272 3273 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3274 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3275 3276 /* Create local submatrices Asub and Bsub */ 3277 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3278 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3279 3280 /* Create submatrix M */ 3281 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3282 3283 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3284 asub = (Mat_MPIAIJ *)M->data; 3285 3286 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3287 n = asub->B->cmap->N; 3288 if (BsubN > n) { 3289 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3290 const PetscInt *idx; 3291 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3292 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3293 3294 PetscCall(PetscMalloc1(n, &idx_new)); 3295 j = 0; 3296 PetscCall(ISGetIndices(iscol_o, &idx)); 3297 for (i = 0; i < n; i++) { 3298 if (j >= BsubN) break; 3299 while (subgarray[i] > garray[j]) j++; 3300 3301 if (subgarray[i] == garray[j]) { 3302 idx_new[i] = idx[j++]; 3303 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3304 } 3305 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3306 3307 PetscCall(ISDestroy(&iscol_o)); 3308 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3309 3310 } else if (BsubN < n) { 3311 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3312 } 3313 3314 PetscCall(PetscFree(garray)); 3315 *submat = M; 3316 3317 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3318 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3319 PetscCall(ISDestroy(&isrow_d)); 3320 3321 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3322 PetscCall(ISDestroy(&iscol_d)); 3323 3324 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3325 PetscCall(ISDestroy(&iscol_o)); 3326 } 3327 PetscFunctionReturn(PETSC_SUCCESS); 3328 } 3329 3330 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3331 { 3332 IS iscol_local = NULL, isrow_d; 3333 PetscInt csize; 3334 PetscInt n, i, j, start, end; 3335 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3336 MPI_Comm comm; 3337 3338 PetscFunctionBegin; 3339 /* If isrow has same processor distribution as mat, 3340 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3341 if (call == MAT_REUSE_MATRIX) { 3342 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3343 if (isrow_d) { 3344 sameRowDist = PETSC_TRUE; 3345 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3346 } else { 3347 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3348 if (iscol_local) { 3349 sameRowDist = PETSC_TRUE; 3350 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3351 } 3352 } 3353 } else { 3354 /* Check if isrow has same processor distribution as mat */ 3355 sameDist[0] = PETSC_FALSE; 3356 PetscCall(ISGetLocalSize(isrow, &n)); 3357 if (!n) { 3358 sameDist[0] = PETSC_TRUE; 3359 } else { 3360 PetscCall(ISGetMinMax(isrow, &i, &j)); 3361 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3362 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3363 } 3364 3365 /* Check if iscol has same processor distribution as mat */ 3366 sameDist[1] = PETSC_FALSE; 3367 PetscCall(ISGetLocalSize(iscol, &n)); 3368 if (!n) { 3369 sameDist[1] = PETSC_TRUE; 3370 } else { 3371 PetscCall(ISGetMinMax(iscol, &i, &j)); 3372 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3373 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3374 } 3375 3376 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3377 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3378 sameRowDist = tsameDist[0]; 3379 } 3380 3381 if (sameRowDist) { 3382 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3383 /* isrow and iscol have same processor distribution as mat */ 3384 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3385 PetscFunctionReturn(PETSC_SUCCESS); 3386 } else { /* sameRowDist */ 3387 /* isrow has same processor distribution as mat */ 3388 if (call == MAT_INITIAL_MATRIX) { 3389 PetscBool sorted; 3390 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3391 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3392 PetscCall(ISGetSize(iscol, &i)); 3393 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3394 3395 PetscCall(ISSorted(iscol_local, &sorted)); 3396 if (sorted) { 3397 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3398 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3399 PetscFunctionReturn(PETSC_SUCCESS); 3400 } 3401 } else { /* call == MAT_REUSE_MATRIX */ 3402 IS iscol_sub; 3403 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3404 if (iscol_sub) { 3405 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3406 PetscFunctionReturn(PETSC_SUCCESS); 3407 } 3408 } 3409 } 3410 } 3411 3412 /* General case: iscol -> iscol_local which has global size of iscol */ 3413 if (call == MAT_REUSE_MATRIX) { 3414 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3415 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3416 } else { 3417 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3418 } 3419 3420 PetscCall(ISGetLocalSize(iscol, &csize)); 3421 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3422 3423 if (call == MAT_INITIAL_MATRIX) { 3424 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3425 PetscCall(ISDestroy(&iscol_local)); 3426 } 3427 PetscFunctionReturn(PETSC_SUCCESS); 3428 } 3429 3430 /*@C 3431 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3432 and "off-diagonal" part of the matrix in CSR format. 3433 3434 Collective 3435 3436 Input Parameters: 3437 + comm - MPI communicator 3438 . A - "diagonal" portion of matrix 3439 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3440 - garray - global index of `B` columns 3441 3442 Output Parameter: 3443 . mat - the matrix, with input `A` as its local diagonal matrix 3444 3445 Level: advanced 3446 3447 Notes: 3448 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3449 3450 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3451 3452 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3453 @*/ 3454 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3455 { 3456 Mat_MPIAIJ *maij; 3457 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3458 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3459 const PetscScalar *oa; 3460 Mat Bnew; 3461 PetscInt m, n, N; 3462 MatType mpi_mat_type; 3463 3464 PetscFunctionBegin; 3465 PetscCall(MatCreate(comm, mat)); 3466 PetscCall(MatGetSize(A, &m, &n)); 3467 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3468 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3469 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3470 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3471 3472 /* Get global columns of mat */ 3473 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3474 3475 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3476 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3477 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3478 PetscCall(MatSetType(*mat, mpi_mat_type)); 3479 3480 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3481 maij = (Mat_MPIAIJ *)(*mat)->data; 3482 3483 (*mat)->preallocated = PETSC_TRUE; 3484 3485 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3486 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3487 3488 /* Set A as diagonal portion of *mat */ 3489 maij->A = A; 3490 3491 nz = oi[m]; 3492 for (i = 0; i < nz; i++) { 3493 col = oj[i]; 3494 oj[i] = garray[col]; 3495 } 3496 3497 /* Set Bnew as off-diagonal portion of *mat */ 3498 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3499 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3500 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3501 bnew = (Mat_SeqAIJ *)Bnew->data; 3502 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3503 maij->B = Bnew; 3504 3505 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3506 3507 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3508 b->free_a = PETSC_FALSE; 3509 b->free_ij = PETSC_FALSE; 3510 PetscCall(MatDestroy(&B)); 3511 3512 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3513 bnew->free_a = PETSC_TRUE; 3514 bnew->free_ij = PETSC_TRUE; 3515 3516 /* condense columns of maij->B */ 3517 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3518 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3519 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3520 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3521 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3522 PetscFunctionReturn(PETSC_SUCCESS); 3523 } 3524 3525 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3526 3527 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3528 { 3529 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3530 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3531 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3532 Mat M, Msub, B = a->B; 3533 MatScalar *aa; 3534 Mat_SeqAIJ *aij; 3535 PetscInt *garray = a->garray, *colsub, Ncols; 3536 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3537 IS iscol_sub, iscmap; 3538 const PetscInt *is_idx, *cmap; 3539 PetscBool allcolumns = PETSC_FALSE; 3540 MPI_Comm comm; 3541 3542 PetscFunctionBegin; 3543 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3544 if (call == MAT_REUSE_MATRIX) { 3545 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3546 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3547 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3548 3549 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3550 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3551 3552 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3553 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3554 3555 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3556 3557 } else { /* call == MAT_INITIAL_MATRIX) */ 3558 PetscBool flg; 3559 3560 PetscCall(ISGetLocalSize(iscol, &n)); 3561 PetscCall(ISGetSize(iscol, &Ncols)); 3562 3563 /* (1) iscol -> nonscalable iscol_local */ 3564 /* Check for special case: each processor gets entire matrix columns */ 3565 PetscCall(ISIdentity(iscol_local, &flg)); 3566 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3567 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3568 if (allcolumns) { 3569 iscol_sub = iscol_local; 3570 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3571 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3572 3573 } else { 3574 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3575 PetscInt *idx, *cmap1, k; 3576 PetscCall(PetscMalloc1(Ncols, &idx)); 3577 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3578 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3579 count = 0; 3580 k = 0; 3581 for (i = 0; i < Ncols; i++) { 3582 j = is_idx[i]; 3583 if (j >= cstart && j < cend) { 3584 /* diagonal part of mat */ 3585 idx[count] = j; 3586 cmap1[count++] = i; /* column index in submat */ 3587 } else if (Bn) { 3588 /* off-diagonal part of mat */ 3589 if (j == garray[k]) { 3590 idx[count] = j; 3591 cmap1[count++] = i; /* column index in submat */ 3592 } else if (j > garray[k]) { 3593 while (j > garray[k] && k < Bn - 1) k++; 3594 if (j == garray[k]) { 3595 idx[count] = j; 3596 cmap1[count++] = i; /* column index in submat */ 3597 } 3598 } 3599 } 3600 } 3601 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3602 3603 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3604 PetscCall(ISGetBlockSize(iscol, &cbs)); 3605 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3606 3607 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3608 } 3609 3610 /* (3) Create sequential Msub */ 3611 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3612 } 3613 3614 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3615 aij = (Mat_SeqAIJ *)(Msub)->data; 3616 ii = aij->i; 3617 PetscCall(ISGetIndices(iscmap, &cmap)); 3618 3619 /* 3620 m - number of local rows 3621 Ncols - number of columns (same on all processors) 3622 rstart - first row in new global matrix generated 3623 */ 3624 PetscCall(MatGetSize(Msub, &m, NULL)); 3625 3626 if (call == MAT_INITIAL_MATRIX) { 3627 /* (4) Create parallel newmat */ 3628 PetscMPIInt rank, size; 3629 PetscInt csize; 3630 3631 PetscCallMPI(MPI_Comm_size(comm, &size)); 3632 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3633 3634 /* 3635 Determine the number of non-zeros in the diagonal and off-diagonal 3636 portions of the matrix in order to do correct preallocation 3637 */ 3638 3639 /* first get start and end of "diagonal" columns */ 3640 PetscCall(ISGetLocalSize(iscol, &csize)); 3641 if (csize == PETSC_DECIDE) { 3642 PetscCall(ISGetSize(isrow, &mglobal)); 3643 if (mglobal == Ncols) { /* square matrix */ 3644 nlocal = m; 3645 } else { 3646 nlocal = Ncols / size + ((Ncols % size) > rank); 3647 } 3648 } else { 3649 nlocal = csize; 3650 } 3651 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3652 rstart = rend - nlocal; 3653 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3654 3655 /* next, compute all the lengths */ 3656 jj = aij->j; 3657 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3658 olens = dlens + m; 3659 for (i = 0; i < m; i++) { 3660 jend = ii[i + 1] - ii[i]; 3661 olen = 0; 3662 dlen = 0; 3663 for (j = 0; j < jend; j++) { 3664 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3665 else dlen++; 3666 jj++; 3667 } 3668 olens[i] = olen; 3669 dlens[i] = dlen; 3670 } 3671 3672 PetscCall(ISGetBlockSize(isrow, &bs)); 3673 PetscCall(ISGetBlockSize(iscol, &cbs)); 3674 3675 PetscCall(MatCreate(comm, &M)); 3676 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3677 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3678 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3679 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3680 PetscCall(PetscFree(dlens)); 3681 3682 } else { /* call == MAT_REUSE_MATRIX */ 3683 M = *newmat; 3684 PetscCall(MatGetLocalSize(M, &i, NULL)); 3685 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3686 PetscCall(MatZeroEntries(M)); 3687 /* 3688 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3689 rather than the slower MatSetValues(). 3690 */ 3691 M->was_assembled = PETSC_TRUE; 3692 M->assembled = PETSC_FALSE; 3693 } 3694 3695 /* (5) Set values of Msub to *newmat */ 3696 PetscCall(PetscMalloc1(count, &colsub)); 3697 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3698 3699 jj = aij->j; 3700 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3701 for (i = 0; i < m; i++) { 3702 row = rstart + i; 3703 nz = ii[i + 1] - ii[i]; 3704 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3705 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3706 jj += nz; 3707 aa += nz; 3708 } 3709 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3710 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3711 3712 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3713 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3714 3715 PetscCall(PetscFree(colsub)); 3716 3717 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3718 if (call == MAT_INITIAL_MATRIX) { 3719 *newmat = M; 3720 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3721 PetscCall(MatDestroy(&Msub)); 3722 3723 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3724 PetscCall(ISDestroy(&iscol_sub)); 3725 3726 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3727 PetscCall(ISDestroy(&iscmap)); 3728 3729 if (iscol_local) { 3730 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3731 PetscCall(ISDestroy(&iscol_local)); 3732 } 3733 } 3734 PetscFunctionReturn(PETSC_SUCCESS); 3735 } 3736 3737 /* 3738 Not great since it makes two copies of the submatrix, first an SeqAIJ 3739 in local and then by concatenating the local matrices the end result. 3740 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3741 3742 This requires a sequential iscol with all indices. 3743 */ 3744 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3745 { 3746 PetscMPIInt rank, size; 3747 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3748 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3749 Mat M, Mreuse; 3750 MatScalar *aa, *vwork; 3751 MPI_Comm comm; 3752 Mat_SeqAIJ *aij; 3753 PetscBool colflag, allcolumns = PETSC_FALSE; 3754 3755 PetscFunctionBegin; 3756 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3757 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3758 PetscCallMPI(MPI_Comm_size(comm, &size)); 3759 3760 /* Check for special case: each processor gets entire matrix columns */ 3761 PetscCall(ISIdentity(iscol, &colflag)); 3762 PetscCall(ISGetLocalSize(iscol, &n)); 3763 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3764 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3765 3766 if (call == MAT_REUSE_MATRIX) { 3767 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3768 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3769 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3770 } else { 3771 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3772 } 3773 3774 /* 3775 m - number of local rows 3776 n - number of columns (same on all processors) 3777 rstart - first row in new global matrix generated 3778 */ 3779 PetscCall(MatGetSize(Mreuse, &m, &n)); 3780 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3781 if (call == MAT_INITIAL_MATRIX) { 3782 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3783 ii = aij->i; 3784 jj = aij->j; 3785 3786 /* 3787 Determine the number of non-zeros in the diagonal and off-diagonal 3788 portions of the matrix in order to do correct preallocation 3789 */ 3790 3791 /* first get start and end of "diagonal" columns */ 3792 if (csize == PETSC_DECIDE) { 3793 PetscCall(ISGetSize(isrow, &mglobal)); 3794 if (mglobal == n) { /* square matrix */ 3795 nlocal = m; 3796 } else { 3797 nlocal = n / size + ((n % size) > rank); 3798 } 3799 } else { 3800 nlocal = csize; 3801 } 3802 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3803 rstart = rend - nlocal; 3804 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3805 3806 /* next, compute all the lengths */ 3807 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3808 olens = dlens + m; 3809 for (i = 0; i < m; i++) { 3810 jend = ii[i + 1] - ii[i]; 3811 olen = 0; 3812 dlen = 0; 3813 for (j = 0; j < jend; j++) { 3814 if (*jj < rstart || *jj >= rend) olen++; 3815 else dlen++; 3816 jj++; 3817 } 3818 olens[i] = olen; 3819 dlens[i] = dlen; 3820 } 3821 PetscCall(MatCreate(comm, &M)); 3822 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3823 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3824 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3825 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3826 PetscCall(PetscFree(dlens)); 3827 } else { 3828 PetscInt ml, nl; 3829 3830 M = *newmat; 3831 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3832 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3833 PetscCall(MatZeroEntries(M)); 3834 /* 3835 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3836 rather than the slower MatSetValues(). 3837 */ 3838 M->was_assembled = PETSC_TRUE; 3839 M->assembled = PETSC_FALSE; 3840 } 3841 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3842 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3843 ii = aij->i; 3844 jj = aij->j; 3845 3846 /* trigger copy to CPU if needed */ 3847 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3848 for (i = 0; i < m; i++) { 3849 row = rstart + i; 3850 nz = ii[i + 1] - ii[i]; 3851 cwork = jj; 3852 jj += nz; 3853 vwork = aa; 3854 aa += nz; 3855 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3856 } 3857 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3858 3859 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3860 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3861 *newmat = M; 3862 3863 /* save submatrix used in processor for next request */ 3864 if (call == MAT_INITIAL_MATRIX) { 3865 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3866 PetscCall(MatDestroy(&Mreuse)); 3867 } 3868 PetscFunctionReturn(PETSC_SUCCESS); 3869 } 3870 3871 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3872 { 3873 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3874 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3875 const PetscInt *JJ; 3876 PetscBool nooffprocentries; 3877 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3878 3879 PetscFunctionBegin; 3880 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3881 3882 PetscCall(PetscLayoutSetUp(B->rmap)); 3883 PetscCall(PetscLayoutSetUp(B->cmap)); 3884 m = B->rmap->n; 3885 cstart = B->cmap->rstart; 3886 cend = B->cmap->rend; 3887 rstart = B->rmap->rstart; 3888 3889 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3890 3891 if (PetscDefined(USE_DEBUG)) { 3892 for (i = 0; i < m; i++) { 3893 nnz = Ii[i + 1] - Ii[i]; 3894 JJ = J ? J + Ii[i] : NULL; 3895 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3896 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3897 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3898 } 3899 } 3900 3901 for (i = 0; i < m; i++) { 3902 nnz = Ii[i + 1] - Ii[i]; 3903 JJ = J ? J + Ii[i] : NULL; 3904 nnz_max = PetscMax(nnz_max, nnz); 3905 d = 0; 3906 for (j = 0; j < nnz; j++) { 3907 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3908 } 3909 d_nnz[i] = d; 3910 o_nnz[i] = nnz - d; 3911 } 3912 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3913 PetscCall(PetscFree2(d_nnz, o_nnz)); 3914 3915 for (i = 0; i < m; i++) { 3916 ii = i + rstart; 3917 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J ? J + Ii[i] : NULL, v ? v + Ii[i] : NULL, INSERT_VALUES)); 3918 } 3919 nooffprocentries = B->nooffprocentries; 3920 B->nooffprocentries = PETSC_TRUE; 3921 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3922 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3923 B->nooffprocentries = nooffprocentries; 3924 3925 /* count number of entries below block diagonal */ 3926 PetscCall(PetscFree(Aij->ld)); 3927 PetscCall(PetscCalloc1(m, &ld)); 3928 Aij->ld = ld; 3929 for (i = 0; i < m; i++) { 3930 nnz = Ii[i + 1] - Ii[i]; 3931 j = 0; 3932 while (j < nnz && J[j] < cstart) j++; 3933 ld[i] = j; 3934 if (J) J += nnz; 3935 } 3936 3937 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3938 PetscFunctionReturn(PETSC_SUCCESS); 3939 } 3940 3941 /*@ 3942 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3943 (the default parallel PETSc format). 3944 3945 Collective 3946 3947 Input Parameters: 3948 + B - the matrix 3949 . i - the indices into j for the start of each local row (starts with zero) 3950 . j - the column indices for each local row (starts with zero) 3951 - v - optional values in the matrix 3952 3953 Level: developer 3954 3955 Notes: 3956 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3957 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3958 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3959 3960 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3961 3962 The format which is used for the sparse matrix input, is equivalent to a 3963 row-major ordering.. i.e for the following matrix, the input data expected is 3964 as shown 3965 3966 .vb 3967 1 0 0 3968 2 0 3 P0 3969 ------- 3970 4 5 6 P1 3971 3972 Process0 [P0] rows_owned=[0,1] 3973 i = {0,1,3} [size = nrow+1 = 2+1] 3974 j = {0,0,2} [size = 3] 3975 v = {1,2,3} [size = 3] 3976 3977 Process1 [P1] rows_owned=[2] 3978 i = {0,3} [size = nrow+1 = 1+1] 3979 j = {0,1,2} [size = 3] 3980 v = {4,5,6} [size = 3] 3981 .ve 3982 3983 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 3984 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3985 @*/ 3986 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3987 { 3988 PetscFunctionBegin; 3989 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3990 PetscFunctionReturn(PETSC_SUCCESS); 3991 } 3992 3993 /*@C 3994 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3995 (the default parallel PETSc format). For good matrix assembly performance 3996 the user should preallocate the matrix storage by setting the parameters 3997 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 3998 3999 Collective 4000 4001 Input Parameters: 4002 + B - the matrix 4003 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4004 (same value is used for all local rows) 4005 . d_nnz - array containing the number of nonzeros in the various rows of the 4006 DIAGONAL portion of the local submatrix (possibly different for each row) 4007 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4008 The size of this array is equal to the number of local rows, i.e 'm'. 4009 For matrices that will be factored, you must leave room for (and set) 4010 the diagonal entry even if it is zero. 4011 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4012 submatrix (same value is used for all local rows). 4013 - o_nnz - array containing the number of nonzeros in the various rows of the 4014 OFF-DIAGONAL portion of the local submatrix (possibly different for 4015 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4016 structure. The size of this array is equal to the number 4017 of local rows, i.e 'm'. 4018 4019 Example Usage: 4020 Consider the following 8x8 matrix with 34 non-zero values, that is 4021 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4022 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4023 as follows 4024 4025 .vb 4026 1 2 0 | 0 3 0 | 0 4 4027 Proc0 0 5 6 | 7 0 0 | 8 0 4028 9 0 10 | 11 0 0 | 12 0 4029 ------------------------------------- 4030 13 0 14 | 15 16 17 | 0 0 4031 Proc1 0 18 0 | 19 20 21 | 0 0 4032 0 0 0 | 22 23 0 | 24 0 4033 ------------------------------------- 4034 Proc2 25 26 27 | 0 0 28 | 29 0 4035 30 0 0 | 31 32 33 | 0 34 4036 .ve 4037 4038 This can be represented as a collection of submatrices as 4039 .vb 4040 A B C 4041 D E F 4042 G H I 4043 .ve 4044 4045 Where the submatrices A,B,C are owned by proc0, D,E,F are 4046 owned by proc1, G,H,I are owned by proc2. 4047 4048 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4049 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4050 The 'M','N' parameters are 8,8, and have the same values on all procs. 4051 4052 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4053 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4054 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4055 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4056 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4057 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4058 4059 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4060 allocated for every row of the local diagonal submatrix, and `o_nz` 4061 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4062 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4063 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4064 In this case, the values of `d_nz`, `o_nz` are 4065 .vb 4066 proc0 dnz = 2, o_nz = 2 4067 proc1 dnz = 3, o_nz = 2 4068 proc2 dnz = 1, o_nz = 4 4069 .ve 4070 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4071 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4072 for proc3. i.e we are using 12+15+10=37 storage locations to store 4073 34 values. 4074 4075 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4076 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4077 In the above case the values for `d_nnz`, `o_nnz` are 4078 .vb 4079 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4080 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4081 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4082 .ve 4083 Here the space allocated is sum of all the above values i.e 34, and 4084 hence pre-allocation is perfect. 4085 4086 Level: intermediate 4087 4088 Notes: 4089 If the *_nnz parameter is given then the *_nz parameter is ignored 4090 4091 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4092 storage. The stored row and column indices begin with zero. 4093 See [Sparse Matrices](sec_matsparse) for details. 4094 4095 The parallel matrix is partitioned such that the first m0 rows belong to 4096 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4097 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4098 4099 The DIAGONAL portion of the local submatrix of a processor can be defined 4100 as the submatrix which is obtained by extraction the part corresponding to 4101 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4102 first row that belongs to the processor, r2 is the last row belonging to 4103 the this processor, and c1-c2 is range of indices of the local part of a 4104 vector suitable for applying the matrix to. This is an mxn matrix. In the 4105 common case of a square matrix, the row and column ranges are the same and 4106 the DIAGONAL part is also square. The remaining portion of the local 4107 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4108 4109 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4110 4111 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4112 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4113 You can also run with the option `-info` and look for messages with the string 4114 malloc in them to see if additional memory allocation was needed. 4115 4116 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4117 `MatGetInfo()`, `PetscSplitOwnership()` 4118 @*/ 4119 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4120 { 4121 PetscFunctionBegin; 4122 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4123 PetscValidType(B, 1); 4124 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4125 PetscFunctionReturn(PETSC_SUCCESS); 4126 } 4127 4128 /*@ 4129 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4130 CSR format for the local rows. 4131 4132 Collective 4133 4134 Input Parameters: 4135 + comm - MPI communicator 4136 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4137 . n - This value should be the same as the local size used in creating the 4138 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4139 calculated if N is given) For square matrices n is almost always m. 4140 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4141 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4142 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4143 . j - column indices 4144 - a - optional matrix values 4145 4146 Output Parameter: 4147 . mat - the matrix 4148 4149 Level: intermediate 4150 4151 Notes: 4152 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4153 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4154 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4155 4156 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4157 4158 The format which is used for the sparse matrix input, is equivalent to a 4159 row-major ordering.. i.e for the following matrix, the input data expected is 4160 as shown 4161 4162 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4163 .vb 4164 1 0 0 4165 2 0 3 P0 4166 ------- 4167 4 5 6 P1 4168 4169 Process0 [P0] rows_owned=[0,1] 4170 i = {0,1,3} [size = nrow+1 = 2+1] 4171 j = {0,0,2} [size = 3] 4172 v = {1,2,3} [size = 3] 4173 4174 Process1 [P1] rows_owned=[2] 4175 i = {0,3} [size = nrow+1 = 1+1] 4176 j = {0,1,2} [size = 3] 4177 v = {4,5,6} [size = 3] 4178 .ve 4179 4180 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4181 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4182 @*/ 4183 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4184 { 4185 PetscFunctionBegin; 4186 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4187 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4188 PetscCall(MatCreate(comm, mat)); 4189 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4190 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4191 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4192 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4193 PetscFunctionReturn(PETSC_SUCCESS); 4194 } 4195 4196 /*@ 4197 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4198 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4199 from `MatCreateMPIAIJWithArrays()` 4200 4201 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4202 4203 Collective 4204 4205 Input Parameters: 4206 + mat - the matrix 4207 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4208 . n - This value should be the same as the local size used in creating the 4209 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4210 calculated if N is given) For square matrices n is almost always m. 4211 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4212 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4213 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4214 . J - column indices 4215 - v - matrix values 4216 4217 Level: deprecated 4218 4219 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4220 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()` 4221 @*/ 4222 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4223 { 4224 PetscInt nnz, i; 4225 PetscBool nooffprocentries; 4226 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4227 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4228 PetscScalar *ad, *ao; 4229 PetscInt ldi, Iii, md; 4230 const PetscInt *Adi = Ad->i; 4231 PetscInt *ld = Aij->ld; 4232 4233 PetscFunctionBegin; 4234 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4235 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4236 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4237 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4238 4239 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4240 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4241 4242 for (i = 0; i < m; i++) { 4243 nnz = Ii[i + 1] - Ii[i]; 4244 Iii = Ii[i]; 4245 ldi = ld[i]; 4246 md = Adi[i + 1] - Adi[i]; 4247 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4248 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4249 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4250 ad += md; 4251 ao += nnz - md; 4252 } 4253 nooffprocentries = mat->nooffprocentries; 4254 mat->nooffprocentries = PETSC_TRUE; 4255 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4256 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4257 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4258 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4259 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4260 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4261 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4262 mat->nooffprocentries = nooffprocentries; 4263 PetscFunctionReturn(PETSC_SUCCESS); 4264 } 4265 4266 /*@ 4267 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4268 4269 Collective 4270 4271 Input Parameters: 4272 + mat - the matrix 4273 - v - matrix values, stored by row 4274 4275 Level: intermediate 4276 4277 Note: 4278 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4279 4280 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4281 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4282 @*/ 4283 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4284 { 4285 PetscInt nnz, i, m; 4286 PetscBool nooffprocentries; 4287 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4288 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4289 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4290 PetscScalar *ad, *ao; 4291 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4292 PetscInt ldi, Iii, md; 4293 PetscInt *ld = Aij->ld; 4294 4295 PetscFunctionBegin; 4296 m = mat->rmap->n; 4297 4298 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4299 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4300 Iii = 0; 4301 for (i = 0; i < m; i++) { 4302 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4303 ldi = ld[i]; 4304 md = Adi[i + 1] - Adi[i]; 4305 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4306 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4307 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4308 ad += md; 4309 ao += nnz - md; 4310 Iii += nnz; 4311 } 4312 nooffprocentries = mat->nooffprocentries; 4313 mat->nooffprocentries = PETSC_TRUE; 4314 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4315 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4316 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4317 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4318 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4319 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4320 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4321 mat->nooffprocentries = nooffprocentries; 4322 PetscFunctionReturn(PETSC_SUCCESS); 4323 } 4324 4325 /*@C 4326 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4327 (the default parallel PETSc format). For good matrix assembly performance 4328 the user should preallocate the matrix storage by setting the parameters 4329 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4330 4331 Collective 4332 4333 Input Parameters: 4334 + comm - MPI communicator 4335 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4336 This value should be the same as the local size used in creating the 4337 y vector for the matrix-vector product y = Ax. 4338 . n - This value should be the same as the local size used in creating the 4339 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4340 calculated if N is given) For square matrices n is almost always m. 4341 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4342 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4343 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4344 (same value is used for all local rows) 4345 . d_nnz - array containing the number of nonzeros in the various rows of the 4346 DIAGONAL portion of the local submatrix (possibly different for each row) 4347 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4348 The size of this array is equal to the number of local rows, i.e 'm'. 4349 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4350 submatrix (same value is used for all local rows). 4351 - o_nnz - array containing the number of nonzeros in the various rows of the 4352 OFF-DIAGONAL portion of the local submatrix (possibly different for 4353 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4354 structure. The size of this array is equal to the number 4355 of local rows, i.e 'm'. 4356 4357 Output Parameter: 4358 . A - the matrix 4359 4360 Options Database Keys: 4361 + -mat_no_inode - Do not use inodes 4362 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4363 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4364 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4365 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4366 4367 Level: intermediate 4368 4369 Notes: 4370 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4371 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4372 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4373 4374 If the *_nnz parameter is given then the *_nz parameter is ignored 4375 4376 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4377 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4378 storage requirements for this matrix. 4379 4380 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4381 processor than it must be used on all processors that share the object for 4382 that argument. 4383 4384 The user MUST specify either the local or global matrix dimensions 4385 (possibly both). 4386 4387 The parallel matrix is partitioned across processors such that the 4388 first m0 rows belong to process 0, the next m1 rows belong to 4389 process 1, the next m2 rows belong to process 2 etc.. where 4390 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4391 values corresponding to [m x N] submatrix. 4392 4393 The columns are logically partitioned with the n0 columns belonging 4394 to 0th partition, the next n1 columns belonging to the next 4395 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4396 4397 The DIAGONAL portion of the local submatrix on any given processor 4398 is the submatrix corresponding to the rows and columns m,n 4399 corresponding to the given processor. i.e diagonal matrix on 4400 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4401 etc. The remaining portion of the local submatrix [m x (N-n)] 4402 constitute the OFF-DIAGONAL portion. The example below better 4403 illustrates this concept. 4404 4405 For a square global matrix we define each processor's diagonal portion 4406 to be its local rows and the corresponding columns (a square submatrix); 4407 each processor's off-diagonal portion encompasses the remainder of the 4408 local matrix (a rectangular submatrix). 4409 4410 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4411 4412 When calling this routine with a single process communicator, a matrix of 4413 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4414 type of communicator, use the construction mechanism 4415 .vb 4416 MatCreate(..., &A); 4417 MatSetType(A, MATMPIAIJ); 4418 MatSetSizes(A, m, n, M, N); 4419 MatMPIAIJSetPreallocation(A, ...); 4420 .ve 4421 4422 By default, this format uses inodes (identical nodes) when possible. 4423 We search for consecutive rows with the same nonzero structure, thereby 4424 reusing matrix information to achieve increased efficiency. 4425 4426 Example Usage: 4427 Consider the following 8x8 matrix with 34 non-zero values, that is 4428 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4429 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4430 as follows 4431 4432 .vb 4433 1 2 0 | 0 3 0 | 0 4 4434 Proc0 0 5 6 | 7 0 0 | 8 0 4435 9 0 10 | 11 0 0 | 12 0 4436 ------------------------------------- 4437 13 0 14 | 15 16 17 | 0 0 4438 Proc1 0 18 0 | 19 20 21 | 0 0 4439 0 0 0 | 22 23 0 | 24 0 4440 ------------------------------------- 4441 Proc2 25 26 27 | 0 0 28 | 29 0 4442 30 0 0 | 31 32 33 | 0 34 4443 .ve 4444 4445 This can be represented as a collection of submatrices as 4446 4447 .vb 4448 A B C 4449 D E F 4450 G H I 4451 .ve 4452 4453 Where the submatrices A,B,C are owned by proc0, D,E,F are 4454 owned by proc1, G,H,I are owned by proc2. 4455 4456 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4457 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4458 The 'M','N' parameters are 8,8, and have the same values on all procs. 4459 4460 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4461 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4462 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4463 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4464 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4465 matrix, ans [DF] as another SeqAIJ matrix. 4466 4467 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4468 allocated for every row of the local diagonal submatrix, and `o_nz` 4469 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4470 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4471 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4472 In this case, the values of `d_nz`,`o_nz` are 4473 .vb 4474 proc0 dnz = 2, o_nz = 2 4475 proc1 dnz = 3, o_nz = 2 4476 proc2 dnz = 1, o_nz = 4 4477 .ve 4478 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4479 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4480 for proc3. i.e we are using 12+15+10=37 storage locations to store 4481 34 values. 4482 4483 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4484 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4485 In the above case the values for d_nnz,o_nnz are 4486 .vb 4487 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4488 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4489 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4490 .ve 4491 Here the space allocated is sum of all the above values i.e 34, and 4492 hence pre-allocation is perfect. 4493 4494 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4495 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4496 @*/ 4497 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4498 { 4499 PetscMPIInt size; 4500 4501 PetscFunctionBegin; 4502 PetscCall(MatCreate(comm, A)); 4503 PetscCall(MatSetSizes(*A, m, n, M, N)); 4504 PetscCallMPI(MPI_Comm_size(comm, &size)); 4505 if (size > 1) { 4506 PetscCall(MatSetType(*A, MATMPIAIJ)); 4507 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4508 } else { 4509 PetscCall(MatSetType(*A, MATSEQAIJ)); 4510 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4511 } 4512 PetscFunctionReturn(PETSC_SUCCESS); 4513 } 4514 4515 /*MC 4516 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4517 4518 Synopsis: 4519 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4520 4521 Not Collective 4522 4523 Input Parameter: 4524 . A - the `MATMPIAIJ` matrix 4525 4526 Output Parameters: 4527 + Ad - the diagonal portion of the matrix 4528 . Ao - the off-diagonal portion of the matrix 4529 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4530 - ierr - error code 4531 4532 Level: advanced 4533 4534 Note: 4535 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4536 4537 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4538 M*/ 4539 4540 /*MC 4541 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4542 4543 Synopsis: 4544 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4545 4546 Not Collective 4547 4548 Input Parameters: 4549 + A - the `MATMPIAIJ` matrix 4550 . Ad - the diagonal portion of the matrix 4551 . Ao - the off-diagonal portion of the matrix 4552 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4553 - ierr - error code 4554 4555 Level: advanced 4556 4557 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4558 M*/ 4559 4560 /*@C 4561 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4562 4563 Not Collective 4564 4565 Input Parameter: 4566 . A - The `MATMPIAIJ` matrix 4567 4568 Output Parameters: 4569 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4570 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4571 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4572 4573 Level: intermediate 4574 4575 Note: 4576 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4577 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4578 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4579 local column numbers to global column numbers in the original matrix. 4580 4581 Fortran Notes: 4582 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4583 4584 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4585 @*/ 4586 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4587 { 4588 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4589 PetscBool flg; 4590 4591 PetscFunctionBegin; 4592 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4593 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4594 if (Ad) *Ad = a->A; 4595 if (Ao) *Ao = a->B; 4596 if (colmap) *colmap = a->garray; 4597 PetscFunctionReturn(PETSC_SUCCESS); 4598 } 4599 4600 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4601 { 4602 PetscInt m, N, i, rstart, nnz, Ii; 4603 PetscInt *indx; 4604 PetscScalar *values; 4605 MatType rootType; 4606 4607 PetscFunctionBegin; 4608 PetscCall(MatGetSize(inmat, &m, &N)); 4609 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4610 PetscInt *dnz, *onz, sum, bs, cbs; 4611 4612 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4613 /* Check sum(n) = N */ 4614 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4615 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4616 4617 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4618 rstart -= m; 4619 4620 MatPreallocateBegin(comm, m, n, dnz, onz); 4621 for (i = 0; i < m; i++) { 4622 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4623 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4624 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4625 } 4626 4627 PetscCall(MatCreate(comm, outmat)); 4628 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4629 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4630 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4631 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4632 PetscCall(MatSetType(*outmat, rootType)); 4633 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4634 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4635 MatPreallocateEnd(dnz, onz); 4636 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4637 } 4638 4639 /* numeric phase */ 4640 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4641 for (i = 0; i < m; i++) { 4642 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4643 Ii = i + rstart; 4644 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4645 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4646 } 4647 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4648 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4649 PetscFunctionReturn(PETSC_SUCCESS); 4650 } 4651 4652 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4653 { 4654 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4655 4656 PetscFunctionBegin; 4657 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4658 PetscCall(PetscFree(merge->id_r)); 4659 PetscCall(PetscFree(merge->len_s)); 4660 PetscCall(PetscFree(merge->len_r)); 4661 PetscCall(PetscFree(merge->bi)); 4662 PetscCall(PetscFree(merge->bj)); 4663 PetscCall(PetscFree(merge->buf_ri[0])); 4664 PetscCall(PetscFree(merge->buf_ri)); 4665 PetscCall(PetscFree(merge->buf_rj[0])); 4666 PetscCall(PetscFree(merge->buf_rj)); 4667 PetscCall(PetscFree(merge->coi)); 4668 PetscCall(PetscFree(merge->coj)); 4669 PetscCall(PetscFree(merge->owners_co)); 4670 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4671 PetscCall(PetscFree(merge)); 4672 PetscFunctionReturn(PETSC_SUCCESS); 4673 } 4674 4675 #include <../src/mat/utils/freespace.h> 4676 #include <petscbt.h> 4677 4678 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4679 { 4680 MPI_Comm comm; 4681 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4682 PetscMPIInt size, rank, taga, *len_s; 4683 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4684 PetscInt proc, m; 4685 PetscInt **buf_ri, **buf_rj; 4686 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4687 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4688 MPI_Request *s_waits, *r_waits; 4689 MPI_Status *status; 4690 const MatScalar *aa, *a_a; 4691 MatScalar **abuf_r, *ba_i; 4692 Mat_Merge_SeqsToMPI *merge; 4693 PetscContainer container; 4694 4695 PetscFunctionBegin; 4696 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4697 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4698 4699 PetscCallMPI(MPI_Comm_size(comm, &size)); 4700 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4701 4702 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4703 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4704 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4705 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4706 aa = a_a; 4707 4708 bi = merge->bi; 4709 bj = merge->bj; 4710 buf_ri = merge->buf_ri; 4711 buf_rj = merge->buf_rj; 4712 4713 PetscCall(PetscMalloc1(size, &status)); 4714 owners = merge->rowmap->range; 4715 len_s = merge->len_s; 4716 4717 /* send and recv matrix values */ 4718 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4719 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4720 4721 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4722 for (proc = 0, k = 0; proc < size; proc++) { 4723 if (!len_s[proc]) continue; 4724 i = owners[proc]; 4725 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4726 k++; 4727 } 4728 4729 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4730 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4731 PetscCall(PetscFree(status)); 4732 4733 PetscCall(PetscFree(s_waits)); 4734 PetscCall(PetscFree(r_waits)); 4735 4736 /* insert mat values of mpimat */ 4737 PetscCall(PetscMalloc1(N, &ba_i)); 4738 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4739 4740 for (k = 0; k < merge->nrecv; k++) { 4741 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4742 nrows = *(buf_ri_k[k]); 4743 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4744 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4745 } 4746 4747 /* set values of ba */ 4748 m = merge->rowmap->n; 4749 for (i = 0; i < m; i++) { 4750 arow = owners[rank] + i; 4751 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4752 bnzi = bi[i + 1] - bi[i]; 4753 PetscCall(PetscArrayzero(ba_i, bnzi)); 4754 4755 /* add local non-zero vals of this proc's seqmat into ba */ 4756 anzi = ai[arow + 1] - ai[arow]; 4757 aj = a->j + ai[arow]; 4758 aa = a_a + ai[arow]; 4759 nextaj = 0; 4760 for (j = 0; nextaj < anzi; j++) { 4761 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4762 ba_i[j] += aa[nextaj++]; 4763 } 4764 } 4765 4766 /* add received vals into ba */ 4767 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4768 /* i-th row */ 4769 if (i == *nextrow[k]) { 4770 anzi = *(nextai[k] + 1) - *nextai[k]; 4771 aj = buf_rj[k] + *(nextai[k]); 4772 aa = abuf_r[k] + *(nextai[k]); 4773 nextaj = 0; 4774 for (j = 0; nextaj < anzi; j++) { 4775 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4776 ba_i[j] += aa[nextaj++]; 4777 } 4778 } 4779 nextrow[k]++; 4780 nextai[k]++; 4781 } 4782 } 4783 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4784 } 4785 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4786 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4787 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4788 4789 PetscCall(PetscFree(abuf_r[0])); 4790 PetscCall(PetscFree(abuf_r)); 4791 PetscCall(PetscFree(ba_i)); 4792 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4793 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4794 PetscFunctionReturn(PETSC_SUCCESS); 4795 } 4796 4797 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4798 { 4799 Mat B_mpi; 4800 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4801 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4802 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4803 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4804 PetscInt len, proc, *dnz, *onz, bs, cbs; 4805 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4806 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4807 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4808 MPI_Status *status; 4809 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4810 PetscBT lnkbt; 4811 Mat_Merge_SeqsToMPI *merge; 4812 PetscContainer container; 4813 4814 PetscFunctionBegin; 4815 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4816 4817 /* make sure it is a PETSc comm */ 4818 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4819 PetscCallMPI(MPI_Comm_size(comm, &size)); 4820 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4821 4822 PetscCall(PetscNew(&merge)); 4823 PetscCall(PetscMalloc1(size, &status)); 4824 4825 /* determine row ownership */ 4826 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4827 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4828 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4829 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4830 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4831 PetscCall(PetscMalloc1(size, &len_si)); 4832 PetscCall(PetscMalloc1(size, &merge->len_s)); 4833 4834 m = merge->rowmap->n; 4835 owners = merge->rowmap->range; 4836 4837 /* determine the number of messages to send, their lengths */ 4838 len_s = merge->len_s; 4839 4840 len = 0; /* length of buf_si[] */ 4841 merge->nsend = 0; 4842 for (proc = 0; proc < size; proc++) { 4843 len_si[proc] = 0; 4844 if (proc == rank) { 4845 len_s[proc] = 0; 4846 } else { 4847 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4848 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4849 } 4850 if (len_s[proc]) { 4851 merge->nsend++; 4852 nrows = 0; 4853 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4854 if (ai[i + 1] > ai[i]) nrows++; 4855 } 4856 len_si[proc] = 2 * (nrows + 1); 4857 len += len_si[proc]; 4858 } 4859 } 4860 4861 /* determine the number and length of messages to receive for ij-structure */ 4862 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4863 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4864 4865 /* post the Irecv of j-structure */ 4866 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4867 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4868 4869 /* post the Isend of j-structure */ 4870 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4871 4872 for (proc = 0, k = 0; proc < size; proc++) { 4873 if (!len_s[proc]) continue; 4874 i = owners[proc]; 4875 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4876 k++; 4877 } 4878 4879 /* receives and sends of j-structure are complete */ 4880 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4881 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4882 4883 /* send and recv i-structure */ 4884 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4885 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4886 4887 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4888 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4889 for (proc = 0, k = 0; proc < size; proc++) { 4890 if (!len_s[proc]) continue; 4891 /* form outgoing message for i-structure: 4892 buf_si[0]: nrows to be sent 4893 [1:nrows]: row index (global) 4894 [nrows+1:2*nrows+1]: i-structure index 4895 */ 4896 nrows = len_si[proc] / 2 - 1; 4897 buf_si_i = buf_si + nrows + 1; 4898 buf_si[0] = nrows; 4899 buf_si_i[0] = 0; 4900 nrows = 0; 4901 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4902 anzi = ai[i + 1] - ai[i]; 4903 if (anzi) { 4904 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4905 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4906 nrows++; 4907 } 4908 } 4909 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4910 k++; 4911 buf_si += len_si[proc]; 4912 } 4913 4914 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4915 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4916 4917 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4918 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4919 4920 PetscCall(PetscFree(len_si)); 4921 PetscCall(PetscFree(len_ri)); 4922 PetscCall(PetscFree(rj_waits)); 4923 PetscCall(PetscFree2(si_waits, sj_waits)); 4924 PetscCall(PetscFree(ri_waits)); 4925 PetscCall(PetscFree(buf_s)); 4926 PetscCall(PetscFree(status)); 4927 4928 /* compute a local seq matrix in each processor */ 4929 /* allocate bi array and free space for accumulating nonzero column info */ 4930 PetscCall(PetscMalloc1(m + 1, &bi)); 4931 bi[0] = 0; 4932 4933 /* create and initialize a linked list */ 4934 nlnk = N + 1; 4935 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4936 4937 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4938 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4939 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4940 4941 current_space = free_space; 4942 4943 /* determine symbolic info for each local row */ 4944 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4945 4946 for (k = 0; k < merge->nrecv; k++) { 4947 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4948 nrows = *buf_ri_k[k]; 4949 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4950 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4951 } 4952 4953 MatPreallocateBegin(comm, m, n, dnz, onz); 4954 len = 0; 4955 for (i = 0; i < m; i++) { 4956 bnzi = 0; 4957 /* add local non-zero cols of this proc's seqmat into lnk */ 4958 arow = owners[rank] + i; 4959 anzi = ai[arow + 1] - ai[arow]; 4960 aj = a->j + ai[arow]; 4961 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4962 bnzi += nlnk; 4963 /* add received col data into lnk */ 4964 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4965 if (i == *nextrow[k]) { /* i-th row */ 4966 anzi = *(nextai[k] + 1) - *nextai[k]; 4967 aj = buf_rj[k] + *nextai[k]; 4968 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4969 bnzi += nlnk; 4970 nextrow[k]++; 4971 nextai[k]++; 4972 } 4973 } 4974 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4975 4976 /* if free space is not available, make more free space */ 4977 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4978 /* copy data into free space, then initialize lnk */ 4979 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4980 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4981 4982 current_space->array += bnzi; 4983 current_space->local_used += bnzi; 4984 current_space->local_remaining -= bnzi; 4985 4986 bi[i + 1] = bi[i] + bnzi; 4987 } 4988 4989 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4990 4991 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 4992 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4993 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4994 4995 /* create symbolic parallel matrix B_mpi */ 4996 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4997 PetscCall(MatCreate(comm, &B_mpi)); 4998 if (n == PETSC_DECIDE) { 4999 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5000 } else { 5001 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5002 } 5003 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5004 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5005 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5006 MatPreallocateEnd(dnz, onz); 5007 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5008 5009 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5010 B_mpi->assembled = PETSC_FALSE; 5011 merge->bi = bi; 5012 merge->bj = bj; 5013 merge->buf_ri = buf_ri; 5014 merge->buf_rj = buf_rj; 5015 merge->coi = NULL; 5016 merge->coj = NULL; 5017 merge->owners_co = NULL; 5018 5019 PetscCall(PetscCommDestroy(&comm)); 5020 5021 /* attach the supporting struct to B_mpi for reuse */ 5022 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5023 PetscCall(PetscContainerSetPointer(container, merge)); 5024 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5025 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5026 PetscCall(PetscContainerDestroy(&container)); 5027 *mpimat = B_mpi; 5028 5029 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5030 PetscFunctionReturn(PETSC_SUCCESS); 5031 } 5032 5033 /*@C 5034 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5035 matrices from each processor 5036 5037 Collective 5038 5039 Input Parameters: 5040 + comm - the communicators the parallel matrix will live on 5041 . seqmat - the input sequential matrices 5042 . m - number of local rows (or `PETSC_DECIDE`) 5043 . n - number of local columns (or `PETSC_DECIDE`) 5044 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5045 5046 Output Parameter: 5047 . mpimat - the parallel matrix generated 5048 5049 Level: advanced 5050 5051 Note: 5052 The dimensions of the sequential matrix in each processor MUST be the same. 5053 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5054 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5055 5056 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5057 @*/ 5058 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5059 { 5060 PetscMPIInt size; 5061 5062 PetscFunctionBegin; 5063 PetscCallMPI(MPI_Comm_size(comm, &size)); 5064 if (size == 1) { 5065 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5066 if (scall == MAT_INITIAL_MATRIX) { 5067 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5068 } else { 5069 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5070 } 5071 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5072 PetscFunctionReturn(PETSC_SUCCESS); 5073 } 5074 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5075 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5076 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5077 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5078 PetscFunctionReturn(PETSC_SUCCESS); 5079 } 5080 5081 /*@ 5082 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5083 5084 Not Collective 5085 5086 Input Parameter: 5087 . A - the matrix 5088 5089 Output Parameter: 5090 . A_loc - the local sequential matrix generated 5091 5092 Level: developer 5093 5094 Notes: 5095 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5096 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5097 `n` is the global column count obtained with `MatGetSize()` 5098 5099 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5100 5101 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5102 5103 Destroy the matrix with `MatDestroy()` 5104 5105 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5106 @*/ 5107 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5108 { 5109 PetscBool mpi; 5110 5111 PetscFunctionBegin; 5112 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5113 if (mpi) { 5114 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5115 } else { 5116 *A_loc = A; 5117 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5118 } 5119 PetscFunctionReturn(PETSC_SUCCESS); 5120 } 5121 5122 /*@ 5123 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5124 5125 Not Collective 5126 5127 Input Parameters: 5128 + A - the matrix 5129 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5130 5131 Output Parameter: 5132 . A_loc - the local sequential matrix generated 5133 5134 Level: developer 5135 5136 Notes: 5137 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5138 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5139 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5140 5141 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5142 5143 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5144 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5145 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5146 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5147 5148 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5149 @*/ 5150 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5151 { 5152 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5153 Mat_SeqAIJ *mat, *a, *b; 5154 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5155 const PetscScalar *aa, *ba, *aav, *bav; 5156 PetscScalar *ca, *cam; 5157 PetscMPIInt size; 5158 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5159 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5160 PetscBool match; 5161 5162 PetscFunctionBegin; 5163 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5164 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5165 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5166 if (size == 1) { 5167 if (scall == MAT_INITIAL_MATRIX) { 5168 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5169 *A_loc = mpimat->A; 5170 } else if (scall == MAT_REUSE_MATRIX) { 5171 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5172 } 5173 PetscFunctionReturn(PETSC_SUCCESS); 5174 } 5175 5176 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5177 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5178 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5179 ai = a->i; 5180 aj = a->j; 5181 bi = b->i; 5182 bj = b->j; 5183 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5184 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5185 aa = aav; 5186 ba = bav; 5187 if (scall == MAT_INITIAL_MATRIX) { 5188 PetscCall(PetscMalloc1(1 + am, &ci)); 5189 ci[0] = 0; 5190 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5191 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5192 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5193 k = 0; 5194 for (i = 0; i < am; i++) { 5195 ncols_o = bi[i + 1] - bi[i]; 5196 ncols_d = ai[i + 1] - ai[i]; 5197 /* off-diagonal portion of A */ 5198 for (jo = 0; jo < ncols_o; jo++) { 5199 col = cmap[*bj]; 5200 if (col >= cstart) break; 5201 cj[k] = col; 5202 bj++; 5203 ca[k++] = *ba++; 5204 } 5205 /* diagonal portion of A */ 5206 for (j = 0; j < ncols_d; j++) { 5207 cj[k] = cstart + *aj++; 5208 ca[k++] = *aa++; 5209 } 5210 /* off-diagonal portion of A */ 5211 for (j = jo; j < ncols_o; j++) { 5212 cj[k] = cmap[*bj++]; 5213 ca[k++] = *ba++; 5214 } 5215 } 5216 /* put together the new matrix */ 5217 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5218 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5219 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5220 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5221 mat->free_a = PETSC_TRUE; 5222 mat->free_ij = PETSC_TRUE; 5223 mat->nonew = 0; 5224 } else if (scall == MAT_REUSE_MATRIX) { 5225 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5226 ci = mat->i; 5227 cj = mat->j; 5228 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5229 for (i = 0; i < am; i++) { 5230 /* off-diagonal portion of A */ 5231 ncols_o = bi[i + 1] - bi[i]; 5232 for (jo = 0; jo < ncols_o; jo++) { 5233 col = cmap[*bj]; 5234 if (col >= cstart) break; 5235 *cam++ = *ba++; 5236 bj++; 5237 } 5238 /* diagonal portion of A */ 5239 ncols_d = ai[i + 1] - ai[i]; 5240 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5241 /* off-diagonal portion of A */ 5242 for (j = jo; j < ncols_o; j++) { 5243 *cam++ = *ba++; 5244 bj++; 5245 } 5246 } 5247 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5248 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5249 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5250 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5251 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5252 PetscFunctionReturn(PETSC_SUCCESS); 5253 } 5254 5255 /*@ 5256 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5257 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5258 5259 Not Collective 5260 5261 Input Parameters: 5262 + A - the matrix 5263 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5264 5265 Output Parameters: 5266 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5267 - A_loc - the local sequential matrix generated 5268 5269 Level: developer 5270 5271 Note: 5272 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5273 part, then those associated with the off-diagonal part (in its local ordering) 5274 5275 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5276 @*/ 5277 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5278 { 5279 Mat Ao, Ad; 5280 const PetscInt *cmap; 5281 PetscMPIInt size; 5282 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5283 5284 PetscFunctionBegin; 5285 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5286 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5287 if (size == 1) { 5288 if (scall == MAT_INITIAL_MATRIX) { 5289 PetscCall(PetscObjectReference((PetscObject)Ad)); 5290 *A_loc = Ad; 5291 } else if (scall == MAT_REUSE_MATRIX) { 5292 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5293 } 5294 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5295 PetscFunctionReturn(PETSC_SUCCESS); 5296 } 5297 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5298 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5299 if (f) { 5300 PetscCall((*f)(A, scall, glob, A_loc)); 5301 } else { 5302 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5303 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5304 Mat_SeqAIJ *c; 5305 PetscInt *ai = a->i, *aj = a->j; 5306 PetscInt *bi = b->i, *bj = b->j; 5307 PetscInt *ci, *cj; 5308 const PetscScalar *aa, *ba; 5309 PetscScalar *ca; 5310 PetscInt i, j, am, dn, on; 5311 5312 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5313 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5314 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5315 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5316 if (scall == MAT_INITIAL_MATRIX) { 5317 PetscInt k; 5318 PetscCall(PetscMalloc1(1 + am, &ci)); 5319 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5320 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5321 ci[0] = 0; 5322 for (i = 0, k = 0; i < am; i++) { 5323 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5324 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5325 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5326 /* diagonal portion of A */ 5327 for (j = 0; j < ncols_d; j++, k++) { 5328 cj[k] = *aj++; 5329 ca[k] = *aa++; 5330 } 5331 /* off-diagonal portion of A */ 5332 for (j = 0; j < ncols_o; j++, k++) { 5333 cj[k] = dn + *bj++; 5334 ca[k] = *ba++; 5335 } 5336 } 5337 /* put together the new matrix */ 5338 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5339 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5340 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5341 c = (Mat_SeqAIJ *)(*A_loc)->data; 5342 c->free_a = PETSC_TRUE; 5343 c->free_ij = PETSC_TRUE; 5344 c->nonew = 0; 5345 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5346 } else if (scall == MAT_REUSE_MATRIX) { 5347 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5348 for (i = 0; i < am; i++) { 5349 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5350 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5351 /* diagonal portion of A */ 5352 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5353 /* off-diagonal portion of A */ 5354 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5355 } 5356 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5357 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5358 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5359 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5360 if (glob) { 5361 PetscInt cst, *gidx; 5362 5363 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5364 PetscCall(PetscMalloc1(dn + on, &gidx)); 5365 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5366 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5367 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5368 } 5369 } 5370 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5371 PetscFunctionReturn(PETSC_SUCCESS); 5372 } 5373 5374 /*@C 5375 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5376 5377 Not Collective 5378 5379 Input Parameters: 5380 + A - the matrix 5381 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5382 . row - index set of rows to extract (or `NULL`) 5383 - col - index set of columns to extract (or `NULL`) 5384 5385 Output Parameter: 5386 . A_loc - the local sequential matrix generated 5387 5388 Level: developer 5389 5390 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5391 @*/ 5392 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5393 { 5394 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5395 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5396 IS isrowa, iscola; 5397 Mat *aloc; 5398 PetscBool match; 5399 5400 PetscFunctionBegin; 5401 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5402 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5403 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5404 if (!row) { 5405 start = A->rmap->rstart; 5406 end = A->rmap->rend; 5407 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5408 } else { 5409 isrowa = *row; 5410 } 5411 if (!col) { 5412 start = A->cmap->rstart; 5413 cmap = a->garray; 5414 nzA = a->A->cmap->n; 5415 nzB = a->B->cmap->n; 5416 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5417 ncols = 0; 5418 for (i = 0; i < nzB; i++) { 5419 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5420 else break; 5421 } 5422 imark = i; 5423 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5424 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5425 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5426 } else { 5427 iscola = *col; 5428 } 5429 if (scall != MAT_INITIAL_MATRIX) { 5430 PetscCall(PetscMalloc1(1, &aloc)); 5431 aloc[0] = *A_loc; 5432 } 5433 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5434 if (!col) { /* attach global id of condensed columns */ 5435 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5436 } 5437 *A_loc = aloc[0]; 5438 PetscCall(PetscFree(aloc)); 5439 if (!row) PetscCall(ISDestroy(&isrowa)); 5440 if (!col) PetscCall(ISDestroy(&iscola)); 5441 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5442 PetscFunctionReturn(PETSC_SUCCESS); 5443 } 5444 5445 /* 5446 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5447 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5448 * on a global size. 5449 * */ 5450 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5451 { 5452 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5453 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5454 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5455 PetscMPIInt owner; 5456 PetscSFNode *iremote, *oiremote; 5457 const PetscInt *lrowindices; 5458 PetscSF sf, osf; 5459 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5460 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5461 MPI_Comm comm; 5462 ISLocalToGlobalMapping mapping; 5463 const PetscScalar *pd_a, *po_a; 5464 5465 PetscFunctionBegin; 5466 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5467 /* plocalsize is the number of roots 5468 * nrows is the number of leaves 5469 * */ 5470 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5471 PetscCall(ISGetLocalSize(rows, &nrows)); 5472 PetscCall(PetscCalloc1(nrows, &iremote)); 5473 PetscCall(ISGetIndices(rows, &lrowindices)); 5474 for (i = 0; i < nrows; i++) { 5475 /* Find a remote index and an owner for a row 5476 * The row could be local or remote 5477 * */ 5478 owner = 0; 5479 lidx = 0; 5480 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5481 iremote[i].index = lidx; 5482 iremote[i].rank = owner; 5483 } 5484 /* Create SF to communicate how many nonzero columns for each row */ 5485 PetscCall(PetscSFCreate(comm, &sf)); 5486 /* SF will figure out the number of nonzero columns for each row, and their 5487 * offsets 5488 * */ 5489 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5490 PetscCall(PetscSFSetFromOptions(sf)); 5491 PetscCall(PetscSFSetUp(sf)); 5492 5493 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5494 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5495 PetscCall(PetscCalloc1(nrows, &pnnz)); 5496 roffsets[0] = 0; 5497 roffsets[1] = 0; 5498 for (i = 0; i < plocalsize; i++) { 5499 /* diagonal */ 5500 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5501 /* off-diagonal */ 5502 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5503 /* compute offsets so that we relative location for each row */ 5504 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5505 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5506 } 5507 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5508 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5509 /* 'r' means root, and 'l' means leaf */ 5510 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5511 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5512 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5513 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5514 PetscCall(PetscSFDestroy(&sf)); 5515 PetscCall(PetscFree(roffsets)); 5516 PetscCall(PetscFree(nrcols)); 5517 dntotalcols = 0; 5518 ontotalcols = 0; 5519 ncol = 0; 5520 for (i = 0; i < nrows; i++) { 5521 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5522 ncol = PetscMax(pnnz[i], ncol); 5523 /* diagonal */ 5524 dntotalcols += nlcols[i * 2 + 0]; 5525 /* off-diagonal */ 5526 ontotalcols += nlcols[i * 2 + 1]; 5527 } 5528 /* We do not need to figure the right number of columns 5529 * since all the calculations will be done by going through the raw data 5530 * */ 5531 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5532 PetscCall(MatSetUp(*P_oth)); 5533 PetscCall(PetscFree(pnnz)); 5534 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5535 /* diagonal */ 5536 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5537 /* off-diagonal */ 5538 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5539 /* diagonal */ 5540 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5541 /* off-diagonal */ 5542 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5543 dntotalcols = 0; 5544 ontotalcols = 0; 5545 ntotalcols = 0; 5546 for (i = 0; i < nrows; i++) { 5547 owner = 0; 5548 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5549 /* Set iremote for diag matrix */ 5550 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5551 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5552 iremote[dntotalcols].rank = owner; 5553 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5554 ilocal[dntotalcols++] = ntotalcols++; 5555 } 5556 /* off-diagonal */ 5557 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5558 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5559 oiremote[ontotalcols].rank = owner; 5560 oilocal[ontotalcols++] = ntotalcols++; 5561 } 5562 } 5563 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5564 PetscCall(PetscFree(loffsets)); 5565 PetscCall(PetscFree(nlcols)); 5566 PetscCall(PetscSFCreate(comm, &sf)); 5567 /* P serves as roots and P_oth is leaves 5568 * Diag matrix 5569 * */ 5570 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5571 PetscCall(PetscSFSetFromOptions(sf)); 5572 PetscCall(PetscSFSetUp(sf)); 5573 5574 PetscCall(PetscSFCreate(comm, &osf)); 5575 /* off-diagonal */ 5576 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5577 PetscCall(PetscSFSetFromOptions(osf)); 5578 PetscCall(PetscSFSetUp(osf)); 5579 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5580 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5581 /* operate on the matrix internal data to save memory */ 5582 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5583 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5584 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5585 /* Convert to global indices for diag matrix */ 5586 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5587 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5588 /* We want P_oth store global indices */ 5589 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5590 /* Use memory scalable approach */ 5591 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5592 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5593 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5594 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5595 /* Convert back to local indices */ 5596 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5597 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5598 nout = 0; 5599 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5600 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5601 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5602 /* Exchange values */ 5603 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5604 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5605 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5606 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5607 /* Stop PETSc from shrinking memory */ 5608 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5609 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5610 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5611 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5612 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5613 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5614 PetscCall(PetscSFDestroy(&sf)); 5615 PetscCall(PetscSFDestroy(&osf)); 5616 PetscFunctionReturn(PETSC_SUCCESS); 5617 } 5618 5619 /* 5620 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5621 * This supports MPIAIJ and MAIJ 5622 * */ 5623 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5624 { 5625 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5626 Mat_SeqAIJ *p_oth; 5627 IS rows, map; 5628 PetscHMapI hamp; 5629 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5630 MPI_Comm comm; 5631 PetscSF sf, osf; 5632 PetscBool has; 5633 5634 PetscFunctionBegin; 5635 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5636 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5637 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5638 * and then create a submatrix (that often is an overlapping matrix) 5639 * */ 5640 if (reuse == MAT_INITIAL_MATRIX) { 5641 /* Use a hash table to figure out unique keys */ 5642 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5643 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5644 count = 0; 5645 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5646 for (i = 0; i < a->B->cmap->n; i++) { 5647 key = a->garray[i] / dof; 5648 PetscCall(PetscHMapIHas(hamp, key, &has)); 5649 if (!has) { 5650 mapping[i] = count; 5651 PetscCall(PetscHMapISet(hamp, key, count++)); 5652 } else { 5653 /* Current 'i' has the same value the previous step */ 5654 mapping[i] = count - 1; 5655 } 5656 } 5657 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5658 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5659 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5660 PetscCall(PetscCalloc1(htsize, &rowindices)); 5661 off = 0; 5662 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5663 PetscCall(PetscHMapIDestroy(&hamp)); 5664 PetscCall(PetscSortInt(htsize, rowindices)); 5665 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5666 /* In case, the matrix was already created but users want to recreate the matrix */ 5667 PetscCall(MatDestroy(P_oth)); 5668 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5669 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5670 PetscCall(ISDestroy(&map)); 5671 PetscCall(ISDestroy(&rows)); 5672 } else if (reuse == MAT_REUSE_MATRIX) { 5673 /* If matrix was already created, we simply update values using SF objects 5674 * that as attached to the matrix earlier. 5675 */ 5676 const PetscScalar *pd_a, *po_a; 5677 5678 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5679 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5680 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5681 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5682 /* Update values in place */ 5683 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5684 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5685 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5686 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5687 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5688 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5689 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5690 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5691 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5692 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5693 PetscFunctionReturn(PETSC_SUCCESS); 5694 } 5695 5696 /*@C 5697 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5698 5699 Collective 5700 5701 Input Parameters: 5702 + A - the first matrix in `MATMPIAIJ` format 5703 . B - the second matrix in `MATMPIAIJ` format 5704 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5705 5706 Output Parameters: 5707 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5708 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5709 - B_seq - the sequential matrix generated 5710 5711 Level: developer 5712 5713 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5714 @*/ 5715 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5716 { 5717 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5718 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5719 IS isrowb, iscolb; 5720 Mat *bseq = NULL; 5721 5722 PetscFunctionBegin; 5723 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5724 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5725 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5726 5727 if (scall == MAT_INITIAL_MATRIX) { 5728 start = A->cmap->rstart; 5729 cmap = a->garray; 5730 nzA = a->A->cmap->n; 5731 nzB = a->B->cmap->n; 5732 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5733 ncols = 0; 5734 for (i = 0; i < nzB; i++) { /* row < local row index */ 5735 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5736 else break; 5737 } 5738 imark = i; 5739 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5740 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5741 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5742 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5743 } else { 5744 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5745 isrowb = *rowb; 5746 iscolb = *colb; 5747 PetscCall(PetscMalloc1(1, &bseq)); 5748 bseq[0] = *B_seq; 5749 } 5750 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5751 *B_seq = bseq[0]; 5752 PetscCall(PetscFree(bseq)); 5753 if (!rowb) { 5754 PetscCall(ISDestroy(&isrowb)); 5755 } else { 5756 *rowb = isrowb; 5757 } 5758 if (!colb) { 5759 PetscCall(ISDestroy(&iscolb)); 5760 } else { 5761 *colb = iscolb; 5762 } 5763 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5764 PetscFunctionReturn(PETSC_SUCCESS); 5765 } 5766 5767 /* 5768 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5769 of the OFF-DIAGONAL portion of local A 5770 5771 Collective 5772 5773 Input Parameters: 5774 + A,B - the matrices in `MATMPIAIJ` format 5775 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5776 5777 Output Parameter: 5778 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5779 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5780 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5781 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5782 5783 Developer Note: 5784 This directly accesses information inside the VecScatter associated with the matrix-vector product 5785 for this matrix. This is not desirable.. 5786 5787 Level: developer 5788 5789 */ 5790 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5791 { 5792 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5793 Mat_SeqAIJ *b_oth; 5794 VecScatter ctx; 5795 MPI_Comm comm; 5796 const PetscMPIInt *rprocs, *sprocs; 5797 const PetscInt *srow, *rstarts, *sstarts; 5798 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5799 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5800 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5801 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5802 PetscMPIInt size, tag, rank, nreqs; 5803 5804 PetscFunctionBegin; 5805 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5806 PetscCallMPI(MPI_Comm_size(comm, &size)); 5807 5808 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5809 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5810 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5811 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5812 5813 if (size == 1) { 5814 startsj_s = NULL; 5815 bufa_ptr = NULL; 5816 *B_oth = NULL; 5817 PetscFunctionReturn(PETSC_SUCCESS); 5818 } 5819 5820 ctx = a->Mvctx; 5821 tag = ((PetscObject)ctx)->tag; 5822 5823 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5824 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5825 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5826 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5827 PetscCall(PetscMalloc1(nreqs, &reqs)); 5828 rwaits = reqs; 5829 swaits = reqs + nrecvs; 5830 5831 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5832 if (scall == MAT_INITIAL_MATRIX) { 5833 /* i-array */ 5834 /* post receives */ 5835 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5836 for (i = 0; i < nrecvs; i++) { 5837 rowlen = rvalues + rstarts[i] * rbs; 5838 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5839 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5840 } 5841 5842 /* pack the outgoing message */ 5843 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5844 5845 sstartsj[0] = 0; 5846 rstartsj[0] = 0; 5847 len = 0; /* total length of j or a array to be sent */ 5848 if (nsends) { 5849 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5850 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5851 } 5852 for (i = 0; i < nsends; i++) { 5853 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5854 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5855 for (j = 0; j < nrows; j++) { 5856 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5857 for (l = 0; l < sbs; l++) { 5858 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5859 5860 rowlen[j * sbs + l] = ncols; 5861 5862 len += ncols; 5863 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5864 } 5865 k++; 5866 } 5867 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5868 5869 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5870 } 5871 /* recvs and sends of i-array are completed */ 5872 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5873 PetscCall(PetscFree(svalues)); 5874 5875 /* allocate buffers for sending j and a arrays */ 5876 PetscCall(PetscMalloc1(len + 1, &bufj)); 5877 PetscCall(PetscMalloc1(len + 1, &bufa)); 5878 5879 /* create i-array of B_oth */ 5880 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5881 5882 b_othi[0] = 0; 5883 len = 0; /* total length of j or a array to be received */ 5884 k = 0; 5885 for (i = 0; i < nrecvs; i++) { 5886 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5887 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5888 for (j = 0; j < nrows; j++) { 5889 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5890 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5891 k++; 5892 } 5893 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5894 } 5895 PetscCall(PetscFree(rvalues)); 5896 5897 /* allocate space for j and a arrays of B_oth */ 5898 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5899 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5900 5901 /* j-array */ 5902 /* post receives of j-array */ 5903 for (i = 0; i < nrecvs; i++) { 5904 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5905 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5906 } 5907 5908 /* pack the outgoing message j-array */ 5909 if (nsends) k = sstarts[0]; 5910 for (i = 0; i < nsends; i++) { 5911 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5912 bufJ = bufj + sstartsj[i]; 5913 for (j = 0; j < nrows; j++) { 5914 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5915 for (ll = 0; ll < sbs; ll++) { 5916 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5917 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5918 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5919 } 5920 } 5921 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5922 } 5923 5924 /* recvs and sends of j-array are completed */ 5925 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5926 } else if (scall == MAT_REUSE_MATRIX) { 5927 sstartsj = *startsj_s; 5928 rstartsj = *startsj_r; 5929 bufa = *bufa_ptr; 5930 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5931 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5932 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5933 5934 /* a-array */ 5935 /* post receives of a-array */ 5936 for (i = 0; i < nrecvs; i++) { 5937 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5938 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5939 } 5940 5941 /* pack the outgoing message a-array */ 5942 if (nsends) k = sstarts[0]; 5943 for (i = 0; i < nsends; i++) { 5944 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5945 bufA = bufa + sstartsj[i]; 5946 for (j = 0; j < nrows; j++) { 5947 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5948 for (ll = 0; ll < sbs; ll++) { 5949 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5950 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5951 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5952 } 5953 } 5954 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5955 } 5956 /* recvs and sends of a-array are completed */ 5957 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5958 PetscCall(PetscFree(reqs)); 5959 5960 if (scall == MAT_INITIAL_MATRIX) { 5961 /* put together the new matrix */ 5962 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5963 5964 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5965 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5966 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5967 b_oth->free_a = PETSC_TRUE; 5968 b_oth->free_ij = PETSC_TRUE; 5969 b_oth->nonew = 0; 5970 5971 PetscCall(PetscFree(bufj)); 5972 if (!startsj_s || !bufa_ptr) { 5973 PetscCall(PetscFree2(sstartsj, rstartsj)); 5974 PetscCall(PetscFree(bufa_ptr)); 5975 } else { 5976 *startsj_s = sstartsj; 5977 *startsj_r = rstartsj; 5978 *bufa_ptr = bufa; 5979 } 5980 } else if (scall == MAT_REUSE_MATRIX) { 5981 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5982 } 5983 5984 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5985 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5986 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5987 PetscFunctionReturn(PETSC_SUCCESS); 5988 } 5989 5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5991 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5992 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5993 #if defined(PETSC_HAVE_MKL_SPARSE) 5994 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5995 #endif 5996 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5997 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5998 #if defined(PETSC_HAVE_ELEMENTAL) 5999 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6000 #endif 6001 #if defined(PETSC_HAVE_SCALAPACK) 6002 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6003 #endif 6004 #if defined(PETSC_HAVE_HYPRE) 6005 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6006 #endif 6007 #if defined(PETSC_HAVE_CUDA) 6008 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6009 #endif 6010 #if defined(PETSC_HAVE_HIP) 6011 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6012 #endif 6013 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6014 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6015 #endif 6016 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6017 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6018 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6019 6020 /* 6021 Computes (B'*A')' since computing B*A directly is untenable 6022 6023 n p p 6024 [ ] [ ] [ ] 6025 m [ A ] * n [ B ] = m [ C ] 6026 [ ] [ ] [ ] 6027 6028 */ 6029 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6030 { 6031 Mat At, Bt, Ct; 6032 6033 PetscFunctionBegin; 6034 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6035 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6036 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6037 PetscCall(MatDestroy(&At)); 6038 PetscCall(MatDestroy(&Bt)); 6039 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6040 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6041 PetscCall(MatDestroy(&Ct)); 6042 PetscFunctionReturn(PETSC_SUCCESS); 6043 } 6044 6045 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6046 { 6047 PetscBool cisdense; 6048 6049 PetscFunctionBegin; 6050 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6051 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6052 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6053 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6054 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6055 PetscCall(MatSetUp(C)); 6056 6057 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6058 PetscFunctionReturn(PETSC_SUCCESS); 6059 } 6060 6061 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6062 { 6063 Mat_Product *product = C->product; 6064 Mat A = product->A, B = product->B; 6065 6066 PetscFunctionBegin; 6067 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6068 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6069 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6070 C->ops->productsymbolic = MatProductSymbolic_AB; 6071 PetscFunctionReturn(PETSC_SUCCESS); 6072 } 6073 6074 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6075 { 6076 Mat_Product *product = C->product; 6077 6078 PetscFunctionBegin; 6079 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6080 PetscFunctionReturn(PETSC_SUCCESS); 6081 } 6082 6083 /* 6084 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6085 6086 Input Parameters: 6087 6088 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6089 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6090 6091 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6092 6093 For Set1, j1[] contains column indices of the nonzeros. 6094 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6095 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6096 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6097 6098 Similar for Set2. 6099 6100 This routine merges the two sets of nonzeros row by row and removes repeats. 6101 6102 Output Parameters: (memory is allocated by the caller) 6103 6104 i[],j[]: the CSR of the merged matrix, which has m rows. 6105 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6106 imap2[]: similar to imap1[], but for Set2. 6107 Note we order nonzeros row-by-row and from left to right. 6108 */ 6109 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6110 { 6111 PetscInt r, m; /* Row index of mat */ 6112 PetscCount t, t1, t2, b1, e1, b2, e2; 6113 6114 PetscFunctionBegin; 6115 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6116 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6117 i[0] = 0; 6118 for (r = 0; r < m; r++) { /* Do row by row merging */ 6119 b1 = rowBegin1[r]; 6120 e1 = rowEnd1[r]; 6121 b2 = rowBegin2[r]; 6122 e2 = rowEnd2[r]; 6123 while (b1 < e1 && b2 < e2) { 6124 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6125 j[t] = j1[b1]; 6126 imap1[t1] = t; 6127 imap2[t2] = t; 6128 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6129 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6130 t1++; 6131 t2++; 6132 t++; 6133 } else if (j1[b1] < j2[b2]) { 6134 j[t] = j1[b1]; 6135 imap1[t1] = t; 6136 b1 += jmap1[t1 + 1] - jmap1[t1]; 6137 t1++; 6138 t++; 6139 } else { 6140 j[t] = j2[b2]; 6141 imap2[t2] = t; 6142 b2 += jmap2[t2 + 1] - jmap2[t2]; 6143 t2++; 6144 t++; 6145 } 6146 } 6147 /* Merge the remaining in either j1[] or j2[] */ 6148 while (b1 < e1) { 6149 j[t] = j1[b1]; 6150 imap1[t1] = t; 6151 b1 += jmap1[t1 + 1] - jmap1[t1]; 6152 t1++; 6153 t++; 6154 } 6155 while (b2 < e2) { 6156 j[t] = j2[b2]; 6157 imap2[t2] = t; 6158 b2 += jmap2[t2 + 1] - jmap2[t2]; 6159 t2++; 6160 t++; 6161 } 6162 i[r + 1] = t; 6163 } 6164 PetscFunctionReturn(PETSC_SUCCESS); 6165 } 6166 6167 /* 6168 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6169 6170 Input Parameters: 6171 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6172 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6173 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6174 6175 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6176 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6177 6178 Output Parameters: 6179 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6180 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6181 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6182 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6183 6184 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6185 Atot: number of entries belonging to the diagonal block. 6186 Annz: number of unique nonzeros belonging to the diagonal block. 6187 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6188 repeats (i.e., same 'i,j' pair). 6189 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6190 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6191 6192 Atot: number of entries belonging to the diagonal block 6193 Annz: number of unique nonzeros belonging to the diagonal block. 6194 6195 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6196 6197 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6198 */ 6199 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6200 { 6201 PetscInt cstart, cend, rstart, rend, row, col; 6202 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6203 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6204 PetscCount k, m, p, q, r, s, mid; 6205 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6206 6207 PetscFunctionBegin; 6208 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6209 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6210 m = rend - rstart; 6211 6212 /* Skip negative rows */ 6213 for (k = 0; k < n; k++) 6214 if (i[k] >= 0) break; 6215 6216 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6217 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6218 */ 6219 while (k < n) { 6220 row = i[k]; 6221 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6222 for (s = k; s < n; s++) 6223 if (i[s] != row) break; 6224 6225 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6226 for (p = k; p < s; p++) { 6227 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6228 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6229 } 6230 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6231 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6232 rowBegin[row - rstart] = k; 6233 rowMid[row - rstart] = mid; 6234 rowEnd[row - rstart] = s; 6235 6236 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6237 Atot += mid - k; 6238 Btot += s - mid; 6239 6240 /* Count unique nonzeros of this diag row */ 6241 for (p = k; p < mid;) { 6242 col = j[p]; 6243 do { 6244 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6245 p++; 6246 } while (p < mid && j[p] == col); 6247 Annz++; 6248 } 6249 6250 /* Count unique nonzeros of this offdiag row */ 6251 for (p = mid; p < s;) { 6252 col = j[p]; 6253 do { 6254 p++; 6255 } while (p < s && j[p] == col); 6256 Bnnz++; 6257 } 6258 k = s; 6259 } 6260 6261 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6262 PetscCall(PetscMalloc1(Atot, &Aperm)); 6263 PetscCall(PetscMalloc1(Btot, &Bperm)); 6264 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6265 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6266 6267 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6268 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6269 for (r = 0; r < m; r++) { 6270 k = rowBegin[r]; 6271 mid = rowMid[r]; 6272 s = rowEnd[r]; 6273 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6274 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6275 Atot += mid - k; 6276 Btot += s - mid; 6277 6278 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6279 for (p = k; p < mid;) { 6280 col = j[p]; 6281 q = p; 6282 do { 6283 p++; 6284 } while (p < mid && j[p] == col); 6285 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6286 Annz++; 6287 } 6288 6289 for (p = mid; p < s;) { 6290 col = j[p]; 6291 q = p; 6292 do { 6293 p++; 6294 } while (p < s && j[p] == col); 6295 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6296 Bnnz++; 6297 } 6298 } 6299 /* Output */ 6300 *Aperm_ = Aperm; 6301 *Annz_ = Annz; 6302 *Atot_ = Atot; 6303 *Ajmap_ = Ajmap; 6304 *Bperm_ = Bperm; 6305 *Bnnz_ = Bnnz; 6306 *Btot_ = Btot; 6307 *Bjmap_ = Bjmap; 6308 PetscFunctionReturn(PETSC_SUCCESS); 6309 } 6310 6311 /* 6312 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6313 6314 Input Parameters: 6315 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6316 nnz: number of unique nonzeros in the merged matrix 6317 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6318 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6319 6320 Output Parameter: (memory is allocated by the caller) 6321 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6322 6323 Example: 6324 nnz1 = 4 6325 nnz = 6 6326 imap = [1,3,4,5] 6327 jmap = [0,3,5,6,7] 6328 then, 6329 jmap_new = [0,0,3,3,5,6,7] 6330 */ 6331 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6332 { 6333 PetscCount k, p; 6334 6335 PetscFunctionBegin; 6336 jmap_new[0] = 0; 6337 p = nnz; /* p loops over jmap_new[] backwards */ 6338 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6339 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6340 } 6341 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6342 PetscFunctionReturn(PETSC_SUCCESS); 6343 } 6344 6345 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6346 { 6347 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6348 6349 PetscFunctionBegin; 6350 PetscCall(PetscSFDestroy(&coo->sf)); 6351 PetscCall(PetscFree(coo->Aperm1)); 6352 PetscCall(PetscFree(coo->Bperm1)); 6353 PetscCall(PetscFree(coo->Ajmap1)); 6354 PetscCall(PetscFree(coo->Bjmap1)); 6355 PetscCall(PetscFree(coo->Aimap2)); 6356 PetscCall(PetscFree(coo->Bimap2)); 6357 PetscCall(PetscFree(coo->Aperm2)); 6358 PetscCall(PetscFree(coo->Bperm2)); 6359 PetscCall(PetscFree(coo->Ajmap2)); 6360 PetscCall(PetscFree(coo->Bjmap2)); 6361 PetscCall(PetscFree(coo->Cperm1)); 6362 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6363 PetscCall(PetscFree(coo)); 6364 PetscFunctionReturn(PETSC_SUCCESS); 6365 } 6366 6367 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6368 { 6369 MPI_Comm comm; 6370 PetscMPIInt rank, size; 6371 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6372 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6373 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6374 PetscContainer container; 6375 MatCOOStruct_MPIAIJ *coo; 6376 6377 PetscFunctionBegin; 6378 PetscCall(PetscFree(mpiaij->garray)); 6379 PetscCall(VecDestroy(&mpiaij->lvec)); 6380 #if defined(PETSC_USE_CTABLE) 6381 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6382 #else 6383 PetscCall(PetscFree(mpiaij->colmap)); 6384 #endif 6385 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6386 mat->assembled = PETSC_FALSE; 6387 mat->was_assembled = PETSC_FALSE; 6388 6389 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6390 PetscCallMPI(MPI_Comm_size(comm, &size)); 6391 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6392 PetscCall(PetscLayoutSetUp(mat->rmap)); 6393 PetscCall(PetscLayoutSetUp(mat->cmap)); 6394 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6395 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6396 PetscCall(MatGetLocalSize(mat, &m, &n)); 6397 PetscCall(MatGetSize(mat, &M, &N)); 6398 6399 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6400 /* entries come first, then local rows, then remote rows. */ 6401 PetscCount n1 = coo_n, *perm1; 6402 PetscInt *i1 = coo_i, *j1 = coo_j; 6403 6404 PetscCall(PetscMalloc1(n1, &perm1)); 6405 for (k = 0; k < n1; k++) perm1[k] = k; 6406 6407 /* Manipulate indices so that entries with negative row or col indices will have smallest 6408 row indices, local entries will have greater but negative row indices, and remote entries 6409 will have positive row indices. 6410 */ 6411 for (k = 0; k < n1; k++) { 6412 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6413 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6414 else { 6415 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6416 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6417 } 6418 } 6419 6420 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6421 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6422 6423 /* Advance k to the first entry we need to take care of */ 6424 for (k = 0; k < n1; k++) 6425 if (i1[k] > PETSC_MIN_INT) break; 6426 PetscInt i1start = k; 6427 6428 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6429 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6430 6431 /* Send remote rows to their owner */ 6432 /* Find which rows should be sent to which remote ranks*/ 6433 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6434 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6435 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6436 const PetscInt *ranges; 6437 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6438 6439 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6440 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6441 for (k = rem; k < n1;) { 6442 PetscMPIInt owner; 6443 PetscInt firstRow, lastRow; 6444 6445 /* Locate a row range */ 6446 firstRow = i1[k]; /* first row of this owner */ 6447 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6448 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6449 6450 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6451 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6452 6453 /* All entries in [k,p) belong to this remote owner */ 6454 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6455 PetscMPIInt *sendto2; 6456 PetscInt *nentries2; 6457 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6458 6459 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6460 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6461 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6462 PetscCall(PetscFree2(sendto, nentries2)); 6463 sendto = sendto2; 6464 nentries = nentries2; 6465 maxNsend = maxNsend2; 6466 } 6467 sendto[nsend] = owner; 6468 nentries[nsend] = p - k; 6469 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6470 nsend++; 6471 k = p; 6472 } 6473 6474 /* Build 1st SF to know offsets on remote to send data */ 6475 PetscSF sf1; 6476 PetscInt nroots = 1, nroots2 = 0; 6477 PetscInt nleaves = nsend, nleaves2 = 0; 6478 PetscInt *offsets; 6479 PetscSFNode *iremote; 6480 6481 PetscCall(PetscSFCreate(comm, &sf1)); 6482 PetscCall(PetscMalloc1(nsend, &iremote)); 6483 PetscCall(PetscMalloc1(nsend, &offsets)); 6484 for (k = 0; k < nsend; k++) { 6485 iremote[k].rank = sendto[k]; 6486 iremote[k].index = 0; 6487 nleaves2 += nentries[k]; 6488 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6489 } 6490 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6491 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6492 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6493 PetscCall(PetscSFDestroy(&sf1)); 6494 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6495 6496 /* Build 2nd SF to send remote COOs to their owner */ 6497 PetscSF sf2; 6498 nroots = nroots2; 6499 nleaves = nleaves2; 6500 PetscCall(PetscSFCreate(comm, &sf2)); 6501 PetscCall(PetscSFSetFromOptions(sf2)); 6502 PetscCall(PetscMalloc1(nleaves, &iremote)); 6503 p = 0; 6504 for (k = 0; k < nsend; k++) { 6505 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6506 for (q = 0; q < nentries[k]; q++, p++) { 6507 iremote[p].rank = sendto[k]; 6508 iremote[p].index = offsets[k] + q; 6509 } 6510 } 6511 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6512 6513 /* Send the remote COOs to their owner */ 6514 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6515 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6516 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6517 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6518 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6519 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6520 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6521 6522 PetscCall(PetscFree(offsets)); 6523 PetscCall(PetscFree2(sendto, nentries)); 6524 6525 /* Sort received COOs by row along with the permutation array */ 6526 for (k = 0; k < n2; k++) perm2[k] = k; 6527 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6528 6529 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6530 PetscCount *Cperm1; 6531 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6532 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves)); 6533 6534 /* Support for HYPRE matrices, kind of a hack. 6535 Swap min column with diagonal so that diagonal values will go first */ 6536 PetscBool hypre; 6537 const char *name; 6538 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6539 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6540 if (hypre) { 6541 PetscInt *minj; 6542 PetscBT hasdiag; 6543 6544 PetscCall(PetscBTCreate(m, &hasdiag)); 6545 PetscCall(PetscMalloc1(m, &minj)); 6546 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6547 for (k = i1start; k < rem; k++) { 6548 if (j1[k] < cstart || j1[k] >= cend) continue; 6549 const PetscInt rindex = i1[k] - rstart; 6550 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6551 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6552 } 6553 for (k = 0; k < n2; k++) { 6554 if (j2[k] < cstart || j2[k] >= cend) continue; 6555 const PetscInt rindex = i2[k] - rstart; 6556 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6557 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6558 } 6559 for (k = i1start; k < rem; k++) { 6560 const PetscInt rindex = i1[k] - rstart; 6561 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6562 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6563 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6564 } 6565 for (k = 0; k < n2; k++) { 6566 const PetscInt rindex = i2[k] - rstart; 6567 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6568 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6569 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6570 } 6571 PetscCall(PetscBTDestroy(&hasdiag)); 6572 PetscCall(PetscFree(minj)); 6573 } 6574 6575 /* Split local COOs and received COOs into diag/offdiag portions */ 6576 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6577 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6578 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6579 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6580 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6581 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6582 6583 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6584 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6585 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6586 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6587 6588 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6589 PetscInt *Ai, *Bi; 6590 PetscInt *Aj, *Bj; 6591 6592 PetscCall(PetscMalloc1(m + 1, &Ai)); 6593 PetscCall(PetscMalloc1(m + 1, &Bi)); 6594 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6595 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6596 6597 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6598 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6599 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6600 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6601 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6602 6603 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6604 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6605 6606 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6607 /* expect nonzeros in A/B most likely have local contributing entries */ 6608 PetscInt Annz = Ai[m]; 6609 PetscInt Bnnz = Bi[m]; 6610 PetscCount *Ajmap1_new, *Bjmap1_new; 6611 6612 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6613 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6614 6615 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6616 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6617 6618 PetscCall(PetscFree(Aimap1)); 6619 PetscCall(PetscFree(Ajmap1)); 6620 PetscCall(PetscFree(Bimap1)); 6621 PetscCall(PetscFree(Bjmap1)); 6622 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6623 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6624 PetscCall(PetscFree(perm1)); 6625 PetscCall(PetscFree3(i2, j2, perm2)); 6626 6627 Ajmap1 = Ajmap1_new; 6628 Bjmap1 = Bjmap1_new; 6629 6630 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6631 if (Annz < Annz1 + Annz2) { 6632 PetscInt *Aj_new; 6633 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6634 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6635 PetscCall(PetscFree(Aj)); 6636 Aj = Aj_new; 6637 } 6638 6639 if (Bnnz < Bnnz1 + Bnnz2) { 6640 PetscInt *Bj_new; 6641 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6642 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6643 PetscCall(PetscFree(Bj)); 6644 Bj = Bj_new; 6645 } 6646 6647 /* Create new submatrices for on-process and off-process coupling */ 6648 PetscScalar *Aa, *Ba; 6649 MatType rtype; 6650 Mat_SeqAIJ *a, *b; 6651 PetscObjectState state; 6652 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6653 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6654 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6655 if (cstart) { 6656 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6657 } 6658 PetscCall(MatDestroy(&mpiaij->A)); 6659 PetscCall(MatDestroy(&mpiaij->B)); 6660 PetscCall(MatGetRootType_Private(mat, &rtype)); 6661 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6662 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6663 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6664 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6665 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6666 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6667 6668 a = (Mat_SeqAIJ *)mpiaij->A->data; 6669 b = (Mat_SeqAIJ *)mpiaij->B->data; 6670 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6671 a->free_a = b->free_a = PETSC_TRUE; 6672 a->free_ij = b->free_ij = PETSC_TRUE; 6673 6674 /* conversion must happen AFTER multiply setup */ 6675 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6676 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6677 PetscCall(VecDestroy(&mpiaij->lvec)); 6678 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6679 6680 // Put the COO struct in a container and then attach that to the matrix 6681 PetscCall(PetscMalloc1(1, &coo)); 6682 coo->n = coo_n; 6683 coo->sf = sf2; 6684 coo->sendlen = nleaves; 6685 coo->recvlen = nroots; 6686 coo->Annz = Annz; 6687 coo->Bnnz = Bnnz; 6688 coo->Annz2 = Annz2; 6689 coo->Bnnz2 = Bnnz2; 6690 coo->Atot1 = Atot1; 6691 coo->Atot2 = Atot2; 6692 coo->Btot1 = Btot1; 6693 coo->Btot2 = Btot2; 6694 coo->Ajmap1 = Ajmap1; 6695 coo->Aperm1 = Aperm1; 6696 coo->Bjmap1 = Bjmap1; 6697 coo->Bperm1 = Bperm1; 6698 coo->Aimap2 = Aimap2; 6699 coo->Ajmap2 = Ajmap2; 6700 coo->Aperm2 = Aperm2; 6701 coo->Bimap2 = Bimap2; 6702 coo->Bjmap2 = Bjmap2; 6703 coo->Bperm2 = Bperm2; 6704 coo->Cperm1 = Cperm1; 6705 // Allocate in preallocation. If not used, it has zero cost on host 6706 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6707 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6708 PetscCall(PetscContainerSetPointer(container, coo)); 6709 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6710 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6711 PetscCall(PetscContainerDestroy(&container)); 6712 PetscFunctionReturn(PETSC_SUCCESS); 6713 } 6714 6715 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6716 { 6717 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6718 Mat A = mpiaij->A, B = mpiaij->B; 6719 PetscScalar *Aa, *Ba; 6720 PetscScalar *sendbuf, *recvbuf; 6721 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6722 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6723 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6724 const PetscCount *Cperm1; 6725 PetscContainer container; 6726 MatCOOStruct_MPIAIJ *coo; 6727 6728 PetscFunctionBegin; 6729 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6730 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6731 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6732 sendbuf = coo->sendbuf; 6733 recvbuf = coo->recvbuf; 6734 Ajmap1 = coo->Ajmap1; 6735 Ajmap2 = coo->Ajmap2; 6736 Aimap2 = coo->Aimap2; 6737 Bjmap1 = coo->Bjmap1; 6738 Bjmap2 = coo->Bjmap2; 6739 Bimap2 = coo->Bimap2; 6740 Aperm1 = coo->Aperm1; 6741 Aperm2 = coo->Aperm2; 6742 Bperm1 = coo->Bperm1; 6743 Bperm2 = coo->Bperm2; 6744 Cperm1 = coo->Cperm1; 6745 6746 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6747 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6748 6749 /* Pack entries to be sent to remote */ 6750 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6751 6752 /* Send remote entries to their owner and overlap the communication with local computation */ 6753 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6754 /* Add local entries to A and B */ 6755 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6756 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6757 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6758 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6759 } 6760 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6761 PetscScalar sum = 0.0; 6762 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6763 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6764 } 6765 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6766 6767 /* Add received remote entries to A and B */ 6768 for (PetscCount i = 0; i < coo->Annz2; i++) { 6769 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6770 } 6771 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6772 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6773 } 6774 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6775 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6776 PetscFunctionReturn(PETSC_SUCCESS); 6777 } 6778 6779 /*MC 6780 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6781 6782 Options Database Keys: 6783 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6784 6785 Level: beginner 6786 6787 Notes: 6788 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6789 in this case the values associated with the rows and columns one passes in are set to zero 6790 in the matrix 6791 6792 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6793 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6794 6795 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6796 M*/ 6797 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6798 { 6799 Mat_MPIAIJ *b; 6800 PetscMPIInt size; 6801 6802 PetscFunctionBegin; 6803 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6804 6805 PetscCall(PetscNew(&b)); 6806 B->data = (void *)b; 6807 B->ops[0] = MatOps_Values; 6808 B->assembled = PETSC_FALSE; 6809 B->insertmode = NOT_SET_VALUES; 6810 b->size = size; 6811 6812 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6813 6814 /* build cache for off array entries formed */ 6815 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6816 6817 b->donotstash = PETSC_FALSE; 6818 b->colmap = NULL; 6819 b->garray = NULL; 6820 b->roworiented = PETSC_TRUE; 6821 6822 /* stuff used for matrix vector multiply */ 6823 b->lvec = NULL; 6824 b->Mvctx = NULL; 6825 6826 /* stuff for MatGetRow() */ 6827 b->rowindices = NULL; 6828 b->rowvalues = NULL; 6829 b->getrowactive = PETSC_FALSE; 6830 6831 /* flexible pointer used in CUSPARSE classes */ 6832 b->spptr = NULL; 6833 6834 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6837 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6844 #if defined(PETSC_HAVE_CUDA) 6845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6846 #endif 6847 #if defined(PETSC_HAVE_HIP) 6848 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6849 #endif 6850 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6851 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6852 #endif 6853 #if defined(PETSC_HAVE_MKL_SPARSE) 6854 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6855 #endif 6856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6859 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6860 #if defined(PETSC_HAVE_ELEMENTAL) 6861 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6862 #endif 6863 #if defined(PETSC_HAVE_SCALAPACK) 6864 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6865 #endif 6866 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6867 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6868 #if defined(PETSC_HAVE_HYPRE) 6869 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6870 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6871 #endif 6872 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6873 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6874 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6875 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6876 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6877 PetscFunctionReturn(PETSC_SUCCESS); 6878 } 6879 6880 /*@C 6881 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6882 and "off-diagonal" part of the matrix in CSR format. 6883 6884 Collective 6885 6886 Input Parameters: 6887 + comm - MPI communicator 6888 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6889 . n - This value should be the same as the local size used in creating the 6890 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6891 calculated if `N` is given) For square matrices `n` is almost always `m`. 6892 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6893 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6894 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6895 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6896 . a - matrix values 6897 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6898 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6899 - oa - matrix values 6900 6901 Output Parameter: 6902 . mat - the matrix 6903 6904 Level: advanced 6905 6906 Notes: 6907 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6908 must free the arrays once the matrix has been destroyed and not before. 6909 6910 The `i` and `j` indices are 0 based 6911 6912 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6913 6914 This sets local rows and cannot be used to set off-processor values. 6915 6916 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6917 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6918 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6919 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6920 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6921 communication if it is known that only local entries will be set. 6922 6923 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6924 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6925 @*/ 6926 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6927 { 6928 Mat_MPIAIJ *maij; 6929 6930 PetscFunctionBegin; 6931 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6932 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6933 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6934 PetscCall(MatCreate(comm, mat)); 6935 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6936 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6937 maij = (Mat_MPIAIJ *)(*mat)->data; 6938 6939 (*mat)->preallocated = PETSC_TRUE; 6940 6941 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6942 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6943 6944 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6945 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6946 6947 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6948 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6949 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6950 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6951 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6952 PetscFunctionReturn(PETSC_SUCCESS); 6953 } 6954 6955 typedef struct { 6956 Mat *mp; /* intermediate products */ 6957 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6958 PetscInt cp; /* number of intermediate products */ 6959 6960 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6961 PetscInt *startsj_s, *startsj_r; 6962 PetscScalar *bufa; 6963 Mat P_oth; 6964 6965 /* may take advantage of merging product->B */ 6966 Mat Bloc; /* B-local by merging diag and off-diag */ 6967 6968 /* cusparse does not have support to split between symbolic and numeric phases. 6969 When api_user is true, we don't need to update the numerical values 6970 of the temporary storage */ 6971 PetscBool reusesym; 6972 6973 /* support for COO values insertion */ 6974 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6975 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6976 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6977 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6978 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6979 PetscMemType mtype; 6980 6981 /* customization */ 6982 PetscBool abmerge; 6983 PetscBool P_oth_bind; 6984 } MatMatMPIAIJBACKEND; 6985 6986 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6987 { 6988 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6989 PetscInt i; 6990 6991 PetscFunctionBegin; 6992 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6993 PetscCall(PetscFree(mmdata->bufa)); 6994 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6995 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6996 PetscCall(MatDestroy(&mmdata->P_oth)); 6997 PetscCall(MatDestroy(&mmdata->Bloc)); 6998 PetscCall(PetscSFDestroy(&mmdata->sf)); 6999 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7000 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7001 PetscCall(PetscFree(mmdata->own[0])); 7002 PetscCall(PetscFree(mmdata->own)); 7003 PetscCall(PetscFree(mmdata->off[0])); 7004 PetscCall(PetscFree(mmdata->off)); 7005 PetscCall(PetscFree(mmdata)); 7006 PetscFunctionReturn(PETSC_SUCCESS); 7007 } 7008 7009 /* Copy selected n entries with indices in idx[] of A to v[]. 7010 If idx is NULL, copy the whole data array of A to v[] 7011 */ 7012 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7013 { 7014 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7015 7016 PetscFunctionBegin; 7017 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7018 if (f) { 7019 PetscCall((*f)(A, n, idx, v)); 7020 } else { 7021 const PetscScalar *vv; 7022 7023 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7024 if (n && idx) { 7025 PetscScalar *w = v; 7026 const PetscInt *oi = idx; 7027 PetscInt j; 7028 7029 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7030 } else { 7031 PetscCall(PetscArraycpy(v, vv, n)); 7032 } 7033 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7034 } 7035 PetscFunctionReturn(PETSC_SUCCESS); 7036 } 7037 7038 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7039 { 7040 MatMatMPIAIJBACKEND *mmdata; 7041 PetscInt i, n_d, n_o; 7042 7043 PetscFunctionBegin; 7044 MatCheckProduct(C, 1); 7045 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7046 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7047 if (!mmdata->reusesym) { /* update temporary matrices */ 7048 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7049 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7050 } 7051 mmdata->reusesym = PETSC_FALSE; 7052 7053 for (i = 0; i < mmdata->cp; i++) { 7054 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7055 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7056 } 7057 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7058 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7059 7060 if (mmdata->mptmp[i]) continue; 7061 if (noff) { 7062 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7063 7064 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7065 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7066 n_o += noff; 7067 n_d += nown; 7068 } else { 7069 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7070 7071 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7072 n_d += mm->nz; 7073 } 7074 } 7075 if (mmdata->hasoffproc) { /* offprocess insertion */ 7076 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7077 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7078 } 7079 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7080 PetscFunctionReturn(PETSC_SUCCESS); 7081 } 7082 7083 /* Support for Pt * A, A * P, or Pt * A * P */ 7084 #define MAX_NUMBER_INTERMEDIATE 4 7085 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7086 { 7087 Mat_Product *product = C->product; 7088 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7089 Mat_MPIAIJ *a, *p; 7090 MatMatMPIAIJBACKEND *mmdata; 7091 ISLocalToGlobalMapping P_oth_l2g = NULL; 7092 IS glob = NULL; 7093 const char *prefix; 7094 char pprefix[256]; 7095 const PetscInt *globidx, *P_oth_idx; 7096 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7097 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7098 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7099 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7100 /* a base offset; type-2: sparse with a local to global map table */ 7101 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7102 7103 MatProductType ptype; 7104 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7105 PetscMPIInt size; 7106 7107 PetscFunctionBegin; 7108 MatCheckProduct(C, 1); 7109 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7110 ptype = product->type; 7111 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7112 ptype = MATPRODUCT_AB; 7113 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7114 } 7115 switch (ptype) { 7116 case MATPRODUCT_AB: 7117 A = product->A; 7118 P = product->B; 7119 m = A->rmap->n; 7120 n = P->cmap->n; 7121 M = A->rmap->N; 7122 N = P->cmap->N; 7123 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7124 break; 7125 case MATPRODUCT_AtB: 7126 P = product->A; 7127 A = product->B; 7128 m = P->cmap->n; 7129 n = A->cmap->n; 7130 M = P->cmap->N; 7131 N = A->cmap->N; 7132 hasoffproc = PETSC_TRUE; 7133 break; 7134 case MATPRODUCT_PtAP: 7135 A = product->A; 7136 P = product->B; 7137 m = P->cmap->n; 7138 n = P->cmap->n; 7139 M = P->cmap->N; 7140 N = P->cmap->N; 7141 hasoffproc = PETSC_TRUE; 7142 break; 7143 default: 7144 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7145 } 7146 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7147 if (size == 1) hasoffproc = PETSC_FALSE; 7148 7149 /* defaults */ 7150 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7151 mp[i] = NULL; 7152 mptmp[i] = PETSC_FALSE; 7153 rmapt[i] = -1; 7154 cmapt[i] = -1; 7155 rmapa[i] = NULL; 7156 cmapa[i] = NULL; 7157 } 7158 7159 /* customization */ 7160 PetscCall(PetscNew(&mmdata)); 7161 mmdata->reusesym = product->api_user; 7162 if (ptype == MATPRODUCT_AB) { 7163 if (product->api_user) { 7164 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7165 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7166 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7167 PetscOptionsEnd(); 7168 } else { 7169 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7170 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7171 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7172 PetscOptionsEnd(); 7173 } 7174 } else if (ptype == MATPRODUCT_PtAP) { 7175 if (product->api_user) { 7176 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7177 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7178 PetscOptionsEnd(); 7179 } else { 7180 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7181 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7182 PetscOptionsEnd(); 7183 } 7184 } 7185 a = (Mat_MPIAIJ *)A->data; 7186 p = (Mat_MPIAIJ *)P->data; 7187 PetscCall(MatSetSizes(C, m, n, M, N)); 7188 PetscCall(PetscLayoutSetUp(C->rmap)); 7189 PetscCall(PetscLayoutSetUp(C->cmap)); 7190 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7191 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7192 7193 cp = 0; 7194 switch (ptype) { 7195 case MATPRODUCT_AB: /* A * P */ 7196 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7197 7198 /* A_diag * P_local (merged or not) */ 7199 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7200 /* P is product->B */ 7201 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7202 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7203 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7204 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7205 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7206 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7207 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7208 mp[cp]->product->api_user = product->api_user; 7209 PetscCall(MatProductSetFromOptions(mp[cp])); 7210 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7211 PetscCall(ISGetIndices(glob, &globidx)); 7212 rmapt[cp] = 1; 7213 cmapt[cp] = 2; 7214 cmapa[cp] = globidx; 7215 mptmp[cp] = PETSC_FALSE; 7216 cp++; 7217 } else { /* A_diag * P_diag and A_diag * P_off */ 7218 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7219 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7220 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7221 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7222 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7223 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7224 mp[cp]->product->api_user = product->api_user; 7225 PetscCall(MatProductSetFromOptions(mp[cp])); 7226 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7227 rmapt[cp] = 1; 7228 cmapt[cp] = 1; 7229 mptmp[cp] = PETSC_FALSE; 7230 cp++; 7231 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7232 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7233 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7234 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7235 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7236 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7237 mp[cp]->product->api_user = product->api_user; 7238 PetscCall(MatProductSetFromOptions(mp[cp])); 7239 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7240 rmapt[cp] = 1; 7241 cmapt[cp] = 2; 7242 cmapa[cp] = p->garray; 7243 mptmp[cp] = PETSC_FALSE; 7244 cp++; 7245 } 7246 7247 /* A_off * P_other */ 7248 if (mmdata->P_oth) { 7249 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7250 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7251 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7252 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7253 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7254 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7255 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7256 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7257 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7258 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7259 mp[cp]->product->api_user = product->api_user; 7260 PetscCall(MatProductSetFromOptions(mp[cp])); 7261 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7262 rmapt[cp] = 1; 7263 cmapt[cp] = 2; 7264 cmapa[cp] = P_oth_idx; 7265 mptmp[cp] = PETSC_FALSE; 7266 cp++; 7267 } 7268 break; 7269 7270 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7271 /* A is product->B */ 7272 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7273 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7274 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7275 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7276 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7277 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7278 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7279 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7280 mp[cp]->product->api_user = product->api_user; 7281 PetscCall(MatProductSetFromOptions(mp[cp])); 7282 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7283 PetscCall(ISGetIndices(glob, &globidx)); 7284 rmapt[cp] = 2; 7285 rmapa[cp] = globidx; 7286 cmapt[cp] = 2; 7287 cmapa[cp] = globidx; 7288 mptmp[cp] = PETSC_FALSE; 7289 cp++; 7290 } else { 7291 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7292 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7293 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7294 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7295 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7296 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7297 mp[cp]->product->api_user = product->api_user; 7298 PetscCall(MatProductSetFromOptions(mp[cp])); 7299 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7300 PetscCall(ISGetIndices(glob, &globidx)); 7301 rmapt[cp] = 1; 7302 cmapt[cp] = 2; 7303 cmapa[cp] = globidx; 7304 mptmp[cp] = PETSC_FALSE; 7305 cp++; 7306 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7307 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7308 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7309 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7310 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7311 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7312 mp[cp]->product->api_user = product->api_user; 7313 PetscCall(MatProductSetFromOptions(mp[cp])); 7314 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7315 rmapt[cp] = 2; 7316 rmapa[cp] = p->garray; 7317 cmapt[cp] = 2; 7318 cmapa[cp] = globidx; 7319 mptmp[cp] = PETSC_FALSE; 7320 cp++; 7321 } 7322 break; 7323 case MATPRODUCT_PtAP: 7324 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7325 /* P is product->B */ 7326 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7327 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7328 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7329 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7330 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7331 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7332 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7333 mp[cp]->product->api_user = product->api_user; 7334 PetscCall(MatProductSetFromOptions(mp[cp])); 7335 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7336 PetscCall(ISGetIndices(glob, &globidx)); 7337 rmapt[cp] = 2; 7338 rmapa[cp] = globidx; 7339 cmapt[cp] = 2; 7340 cmapa[cp] = globidx; 7341 mptmp[cp] = PETSC_FALSE; 7342 cp++; 7343 if (mmdata->P_oth) { 7344 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7345 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7346 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7347 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7348 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7349 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7350 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7351 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7352 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7353 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7354 mp[cp]->product->api_user = product->api_user; 7355 PetscCall(MatProductSetFromOptions(mp[cp])); 7356 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7357 mptmp[cp] = PETSC_TRUE; 7358 cp++; 7359 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7360 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7361 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7362 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7363 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7364 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7365 mp[cp]->product->api_user = product->api_user; 7366 PetscCall(MatProductSetFromOptions(mp[cp])); 7367 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7368 rmapt[cp] = 2; 7369 rmapa[cp] = globidx; 7370 cmapt[cp] = 2; 7371 cmapa[cp] = P_oth_idx; 7372 mptmp[cp] = PETSC_FALSE; 7373 cp++; 7374 } 7375 break; 7376 default: 7377 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7378 } 7379 /* sanity check */ 7380 if (size > 1) 7381 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7382 7383 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7384 for (i = 0; i < cp; i++) { 7385 mmdata->mp[i] = mp[i]; 7386 mmdata->mptmp[i] = mptmp[i]; 7387 } 7388 mmdata->cp = cp; 7389 C->product->data = mmdata; 7390 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7391 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7392 7393 /* memory type */ 7394 mmdata->mtype = PETSC_MEMTYPE_HOST; 7395 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7396 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7397 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7398 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7399 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7400 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7401 7402 /* prepare coo coordinates for values insertion */ 7403 7404 /* count total nonzeros of those intermediate seqaij Mats 7405 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7406 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7407 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7408 */ 7409 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7410 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7411 if (mptmp[cp]) continue; 7412 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7413 const PetscInt *rmap = rmapa[cp]; 7414 const PetscInt mr = mp[cp]->rmap->n; 7415 const PetscInt rs = C->rmap->rstart; 7416 const PetscInt re = C->rmap->rend; 7417 const PetscInt *ii = mm->i; 7418 for (i = 0; i < mr; i++) { 7419 const PetscInt gr = rmap[i]; 7420 const PetscInt nz = ii[i + 1] - ii[i]; 7421 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7422 else ncoo_oown += nz; /* this row is local */ 7423 } 7424 } else ncoo_d += mm->nz; 7425 } 7426 7427 /* 7428 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7429 7430 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7431 7432 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7433 7434 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7435 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7436 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7437 7438 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7439 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7440 */ 7441 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7442 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7443 7444 /* gather (i,j) of nonzeros inserted by remote procs */ 7445 if (hasoffproc) { 7446 PetscSF msf; 7447 PetscInt ncoo2, *coo_i2, *coo_j2; 7448 7449 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7450 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7451 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7452 7453 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7454 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7455 PetscInt *idxoff = mmdata->off[cp]; 7456 PetscInt *idxown = mmdata->own[cp]; 7457 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7458 const PetscInt *rmap = rmapa[cp]; 7459 const PetscInt *cmap = cmapa[cp]; 7460 const PetscInt *ii = mm->i; 7461 PetscInt *coi = coo_i + ncoo_o; 7462 PetscInt *coj = coo_j + ncoo_o; 7463 const PetscInt mr = mp[cp]->rmap->n; 7464 const PetscInt rs = C->rmap->rstart; 7465 const PetscInt re = C->rmap->rend; 7466 const PetscInt cs = C->cmap->rstart; 7467 for (i = 0; i < mr; i++) { 7468 const PetscInt *jj = mm->j + ii[i]; 7469 const PetscInt gr = rmap[i]; 7470 const PetscInt nz = ii[i + 1] - ii[i]; 7471 if (gr < rs || gr >= re) { /* this is an offproc row */ 7472 for (j = ii[i]; j < ii[i + 1]; j++) { 7473 *coi++ = gr; 7474 *idxoff++ = j; 7475 } 7476 if (!cmapt[cp]) { /* already global */ 7477 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7478 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7479 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7480 } else { /* offdiag */ 7481 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7482 } 7483 ncoo_o += nz; 7484 } else { /* this is a local row */ 7485 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7486 } 7487 } 7488 } 7489 mmdata->off[cp + 1] = idxoff; 7490 mmdata->own[cp + 1] = idxown; 7491 } 7492 7493 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7494 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7495 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7496 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7497 ncoo = ncoo_d + ncoo_oown + ncoo2; 7498 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7499 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7500 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7501 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7502 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7503 PetscCall(PetscFree2(coo_i, coo_j)); 7504 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7505 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7506 coo_i = coo_i2; 7507 coo_j = coo_j2; 7508 } else { /* no offproc values insertion */ 7509 ncoo = ncoo_d; 7510 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7511 7512 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7513 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7514 PetscCall(PetscSFSetUp(mmdata->sf)); 7515 } 7516 mmdata->hasoffproc = hasoffproc; 7517 7518 /* gather (i,j) of nonzeros inserted locally */ 7519 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7520 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7521 PetscInt *coi = coo_i + ncoo_d; 7522 PetscInt *coj = coo_j + ncoo_d; 7523 const PetscInt *jj = mm->j; 7524 const PetscInt *ii = mm->i; 7525 const PetscInt *cmap = cmapa[cp]; 7526 const PetscInt *rmap = rmapa[cp]; 7527 const PetscInt mr = mp[cp]->rmap->n; 7528 const PetscInt rs = C->rmap->rstart; 7529 const PetscInt re = C->rmap->rend; 7530 const PetscInt cs = C->cmap->rstart; 7531 7532 if (mptmp[cp]) continue; 7533 if (rmapt[cp] == 1) { /* consecutive rows */ 7534 /* fill coo_i */ 7535 for (i = 0; i < mr; i++) { 7536 const PetscInt gr = i + rs; 7537 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7538 } 7539 /* fill coo_j */ 7540 if (!cmapt[cp]) { /* type-0, already global */ 7541 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7542 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7543 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7544 } else { /* type-2, local to global for sparse columns */ 7545 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7546 } 7547 ncoo_d += mm->nz; 7548 } else if (rmapt[cp] == 2) { /* sparse rows */ 7549 for (i = 0; i < mr; i++) { 7550 const PetscInt *jj = mm->j + ii[i]; 7551 const PetscInt gr = rmap[i]; 7552 const PetscInt nz = ii[i + 1] - ii[i]; 7553 if (gr >= rs && gr < re) { /* local rows */ 7554 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7555 if (!cmapt[cp]) { /* type-0, already global */ 7556 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7557 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7558 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7559 } else { /* type-2, local to global for sparse columns */ 7560 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7561 } 7562 ncoo_d += nz; 7563 } 7564 } 7565 } 7566 } 7567 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7568 PetscCall(ISDestroy(&glob)); 7569 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7570 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7571 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7572 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7573 7574 /* preallocate with COO data */ 7575 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7576 PetscCall(PetscFree2(coo_i, coo_j)); 7577 PetscFunctionReturn(PETSC_SUCCESS); 7578 } 7579 7580 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7581 { 7582 Mat_Product *product = mat->product; 7583 #if defined(PETSC_HAVE_DEVICE) 7584 PetscBool match = PETSC_FALSE; 7585 PetscBool usecpu = PETSC_FALSE; 7586 #else 7587 PetscBool match = PETSC_TRUE; 7588 #endif 7589 7590 PetscFunctionBegin; 7591 MatCheckProduct(mat, 1); 7592 #if defined(PETSC_HAVE_DEVICE) 7593 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7594 if (match) { /* we can always fallback to the CPU if requested */ 7595 switch (product->type) { 7596 case MATPRODUCT_AB: 7597 if (product->api_user) { 7598 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7599 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7600 PetscOptionsEnd(); 7601 } else { 7602 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7603 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7604 PetscOptionsEnd(); 7605 } 7606 break; 7607 case MATPRODUCT_AtB: 7608 if (product->api_user) { 7609 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7610 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7611 PetscOptionsEnd(); 7612 } else { 7613 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7614 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7615 PetscOptionsEnd(); 7616 } 7617 break; 7618 case MATPRODUCT_PtAP: 7619 if (product->api_user) { 7620 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7621 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7622 PetscOptionsEnd(); 7623 } else { 7624 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7625 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7626 PetscOptionsEnd(); 7627 } 7628 break; 7629 default: 7630 break; 7631 } 7632 match = (PetscBool)!usecpu; 7633 } 7634 #endif 7635 if (match) { 7636 switch (product->type) { 7637 case MATPRODUCT_AB: 7638 case MATPRODUCT_AtB: 7639 case MATPRODUCT_PtAP: 7640 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7641 break; 7642 default: 7643 break; 7644 } 7645 } 7646 /* fallback to MPIAIJ ops */ 7647 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7648 PetscFunctionReturn(PETSC_SUCCESS); 7649 } 7650 7651 /* 7652 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7653 7654 n - the number of block indices in cc[] 7655 cc - the block indices (must be large enough to contain the indices) 7656 */ 7657 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7658 { 7659 PetscInt cnt = -1, nidx, j; 7660 const PetscInt *idx; 7661 7662 PetscFunctionBegin; 7663 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7664 if (nidx) { 7665 cnt = 0; 7666 cc[cnt] = idx[0] / bs; 7667 for (j = 1; j < nidx; j++) { 7668 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7669 } 7670 } 7671 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7672 *n = cnt + 1; 7673 PetscFunctionReturn(PETSC_SUCCESS); 7674 } 7675 7676 /* 7677 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7678 7679 ncollapsed - the number of block indices 7680 collapsed - the block indices (must be large enough to contain the indices) 7681 */ 7682 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7683 { 7684 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7685 7686 PetscFunctionBegin; 7687 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7688 for (i = start + 1; i < start + bs; i++) { 7689 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7690 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7691 cprevtmp = cprev; 7692 cprev = merged; 7693 merged = cprevtmp; 7694 } 7695 *ncollapsed = nprev; 7696 if (collapsed) *collapsed = cprev; 7697 PetscFunctionReturn(PETSC_SUCCESS); 7698 } 7699 7700 /* 7701 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7702 7703 Input Parameter: 7704 . Amat - matrix 7705 - symmetrize - make the result symmetric 7706 + scale - scale with diagonal 7707 7708 Output Parameter: 7709 . a_Gmat - output scalar graph >= 0 7710 7711 */ 7712 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7713 { 7714 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7715 MPI_Comm comm; 7716 Mat Gmat; 7717 PetscBool ismpiaij, isseqaij; 7718 Mat a, b, c; 7719 MatType jtype; 7720 7721 PetscFunctionBegin; 7722 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7723 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7724 PetscCall(MatGetSize(Amat, &MM, &NN)); 7725 PetscCall(MatGetBlockSize(Amat, &bs)); 7726 nloc = (Iend - Istart) / bs; 7727 7728 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7729 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7730 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7731 7732 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7733 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7734 implementation */ 7735 if (bs > 1) { 7736 PetscCall(MatGetType(Amat, &jtype)); 7737 PetscCall(MatCreate(comm, &Gmat)); 7738 PetscCall(MatSetType(Gmat, jtype)); 7739 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7740 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7741 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7742 PetscInt *d_nnz, *o_nnz; 7743 MatScalar *aa, val, *AA; 7744 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7745 if (isseqaij) { 7746 a = Amat; 7747 b = NULL; 7748 } else { 7749 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7750 a = d->A; 7751 b = d->B; 7752 } 7753 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7754 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7755 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7756 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7757 const PetscInt *cols1, *cols2; 7758 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7759 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7760 nnz[brow / bs] = nc2 / bs; 7761 if (nc2 % bs) ok = 0; 7762 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7763 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7764 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7765 if (nc1 != nc2) ok = 0; 7766 else { 7767 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7768 if (cols1[jj] != cols2[jj]) ok = 0; 7769 if (cols1[jj] % bs != jj % bs) ok = 0; 7770 } 7771 } 7772 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7773 } 7774 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7775 if (!ok) { 7776 PetscCall(PetscFree2(d_nnz, o_nnz)); 7777 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7778 goto old_bs; 7779 } 7780 } 7781 } 7782 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7783 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7784 PetscCall(PetscFree2(d_nnz, o_nnz)); 7785 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7786 // diag 7787 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7788 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7789 ai = aseq->i; 7790 n = ai[brow + 1] - ai[brow]; 7791 aj = aseq->j + ai[brow]; 7792 for (int k = 0; k < n; k += bs) { // block columns 7793 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7794 val = 0; 7795 if (index_size == 0) { 7796 for (int ii = 0; ii < bs; ii++) { // rows in block 7797 aa = aseq->a + ai[brow + ii] + k; 7798 for (int jj = 0; jj < bs; jj++) { // columns in block 7799 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7800 } 7801 } 7802 } else { // use (index,index) value if provided 7803 for (int iii = 0; iii < index_size; iii++) { // rows in block 7804 int ii = index[iii]; 7805 aa = aseq->a + ai[brow + ii] + k; 7806 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7807 int jj = index[jjj]; 7808 val = PetscAbs(PetscRealPart(aa[jj])); 7809 } 7810 } 7811 } 7812 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7813 AA[k / bs] = val; 7814 } 7815 grow = Istart / bs + brow / bs; 7816 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7817 } 7818 // off-diag 7819 if (ismpiaij) { 7820 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7821 const PetscScalar *vals; 7822 const PetscInt *cols, *garray = aij->garray; 7823 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7824 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7825 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7826 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7827 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7828 AA[k / bs] = 0; 7829 AJ[cidx] = garray[cols[k]] / bs; 7830 } 7831 nc = ncols / bs; 7832 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7833 if (index_size == 0) { 7834 for (int ii = 0; ii < bs; ii++) { // rows in block 7835 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7836 for (int k = 0; k < ncols; k += bs) { 7837 for (int jj = 0; jj < bs; jj++) { // cols in block 7838 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7839 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7840 } 7841 } 7842 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7843 } 7844 } else { // use (index,index) value if provided 7845 for (int iii = 0; iii < index_size; iii++) { // rows in block 7846 int ii = index[iii]; 7847 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7848 for (int k = 0; k < ncols; k += bs) { 7849 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7850 int jj = index[jjj]; 7851 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7852 } 7853 } 7854 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7855 } 7856 } 7857 grow = Istart / bs + brow / bs; 7858 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7859 } 7860 } 7861 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7862 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7863 PetscCall(PetscFree2(AA, AJ)); 7864 } else { 7865 const PetscScalar *vals; 7866 const PetscInt *idx; 7867 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7868 old_bs: 7869 /* 7870 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7871 */ 7872 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7873 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7874 if (isseqaij) { 7875 PetscInt max_d_nnz; 7876 /* 7877 Determine exact preallocation count for (sequential) scalar matrix 7878 */ 7879 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7880 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7881 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7882 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7883 PetscCall(PetscFree3(w0, w1, w2)); 7884 } else if (ismpiaij) { 7885 Mat Daij, Oaij; 7886 const PetscInt *garray; 7887 PetscInt max_d_nnz; 7888 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7889 /* 7890 Determine exact preallocation count for diagonal block portion of scalar matrix 7891 */ 7892 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7893 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7894 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7895 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7896 PetscCall(PetscFree3(w0, w1, w2)); 7897 /* 7898 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7899 */ 7900 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7901 o_nnz[jj] = 0; 7902 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7903 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7904 o_nnz[jj] += ncols; 7905 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7906 } 7907 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7908 } 7909 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7910 /* get scalar copy (norms) of matrix */ 7911 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7912 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7913 PetscCall(PetscFree2(d_nnz, o_nnz)); 7914 for (Ii = Istart; Ii < Iend; Ii++) { 7915 PetscInt dest_row = Ii / bs; 7916 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7917 for (jj = 0; jj < ncols; jj++) { 7918 PetscInt dest_col = idx[jj] / bs; 7919 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7920 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7921 } 7922 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7923 } 7924 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7925 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7926 } 7927 } else { 7928 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7929 else { 7930 Gmat = Amat; 7931 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7932 } 7933 if (isseqaij) { 7934 a = Gmat; 7935 b = NULL; 7936 } else { 7937 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7938 a = d->A; 7939 b = d->B; 7940 } 7941 if (filter >= 0 || scale) { 7942 /* take absolute value of each entry */ 7943 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7944 MatInfo info; 7945 PetscScalar *avals; 7946 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7947 PetscCall(MatSeqAIJGetArray(c, &avals)); 7948 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7949 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7950 } 7951 } 7952 } 7953 if (symmetrize) { 7954 PetscBool isset, issym; 7955 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7956 if (!isset || !issym) { 7957 Mat matTrans; 7958 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7959 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7960 PetscCall(MatDestroy(&matTrans)); 7961 } 7962 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7963 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7964 if (scale) { 7965 /* scale c for all diagonal values = 1 or -1 */ 7966 Vec diag; 7967 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7968 PetscCall(MatGetDiagonal(Gmat, diag)); 7969 PetscCall(VecReciprocal(diag)); 7970 PetscCall(VecSqrtAbs(diag)); 7971 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7972 PetscCall(VecDestroy(&diag)); 7973 } 7974 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7975 7976 if (filter >= 0) { 7977 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 7978 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 7979 } 7980 *a_Gmat = Gmat; 7981 PetscFunctionReturn(PETSC_SUCCESS); 7982 } 7983 7984 /* 7985 Special version for direct calls from Fortran 7986 */ 7987 #include <petsc/private/fortranimpl.h> 7988 7989 /* Change these macros so can be used in void function */ 7990 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7991 #undef PetscCall 7992 #define PetscCall(...) \ 7993 do { \ 7994 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7995 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7996 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 7997 return; \ 7998 } \ 7999 } while (0) 8000 8001 #undef SETERRQ 8002 #define SETERRQ(comm, ierr, ...) \ 8003 do { \ 8004 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8005 return; \ 8006 } while (0) 8007 8008 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8009 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8010 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8011 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8012 #else 8013 #endif 8014 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8015 { 8016 Mat mat = *mmat; 8017 PetscInt m = *mm, n = *mn; 8018 InsertMode addv = *maddv; 8019 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8020 PetscScalar value; 8021 8022 MatCheckPreallocated(mat, 1); 8023 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8024 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8025 { 8026 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8027 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8028 PetscBool roworiented = aij->roworiented; 8029 8030 /* Some Variables required in the macro */ 8031 Mat A = aij->A; 8032 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8033 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8034 MatScalar *aa; 8035 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8036 Mat B = aij->B; 8037 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8038 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8039 MatScalar *ba; 8040 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8041 * cannot use "#if defined" inside a macro. */ 8042 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8043 8044 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8045 PetscInt nonew = a->nonew; 8046 MatScalar *ap1, *ap2; 8047 8048 PetscFunctionBegin; 8049 PetscCall(MatSeqAIJGetArray(A, &aa)); 8050 PetscCall(MatSeqAIJGetArray(B, &ba)); 8051 for (i = 0; i < m; i++) { 8052 if (im[i] < 0) continue; 8053 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8054 if (im[i] >= rstart && im[i] < rend) { 8055 row = im[i] - rstart; 8056 lastcol1 = -1; 8057 rp1 = aj + ai[row]; 8058 ap1 = aa + ai[row]; 8059 rmax1 = aimax[row]; 8060 nrow1 = ailen[row]; 8061 low1 = 0; 8062 high1 = nrow1; 8063 lastcol2 = -1; 8064 rp2 = bj + bi[row]; 8065 ap2 = ba + bi[row]; 8066 rmax2 = bimax[row]; 8067 nrow2 = bilen[row]; 8068 low2 = 0; 8069 high2 = nrow2; 8070 8071 for (j = 0; j < n; j++) { 8072 if (roworiented) value = v[i * n + j]; 8073 else value = v[i + j * m]; 8074 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8075 if (in[j] >= cstart && in[j] < cend) { 8076 col = in[j] - cstart; 8077 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8078 } else if (in[j] < 0) continue; 8079 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8080 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8081 } else { 8082 if (mat->was_assembled) { 8083 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8084 #if defined(PETSC_USE_CTABLE) 8085 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8086 col--; 8087 #else 8088 col = aij->colmap[in[j]] - 1; 8089 #endif 8090 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8091 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8092 col = in[j]; 8093 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8094 B = aij->B; 8095 b = (Mat_SeqAIJ *)B->data; 8096 bimax = b->imax; 8097 bi = b->i; 8098 bilen = b->ilen; 8099 bj = b->j; 8100 rp2 = bj + bi[row]; 8101 ap2 = ba + bi[row]; 8102 rmax2 = bimax[row]; 8103 nrow2 = bilen[row]; 8104 low2 = 0; 8105 high2 = nrow2; 8106 bm = aij->B->rmap->n; 8107 ba = b->a; 8108 inserted = PETSC_FALSE; 8109 } 8110 } else col = in[j]; 8111 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8112 } 8113 } 8114 } else if (!aij->donotstash) { 8115 if (roworiented) { 8116 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8117 } else { 8118 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8119 } 8120 } 8121 } 8122 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8123 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8124 } 8125 PetscFunctionReturnVoid(); 8126 } 8127 8128 /* Undefining these here since they were redefined from their original definition above! No 8129 * other PETSc functions should be defined past this point, as it is impossible to recover the 8130 * original definitions */ 8131 #undef PetscCall 8132 #undef SETERRQ 8133