1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 167 PetscFunctionReturn(PETSC_SUCCESS); 168 } 169 170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 171 { 172 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 173 174 PetscFunctionBegin; 175 if (mat->A) { 176 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 177 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 178 } 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181 182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 183 { 184 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 186 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 187 const PetscInt *ia, *ib; 188 const MatScalar *aa, *bb, *aav, *bav; 189 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 190 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 191 192 PetscFunctionBegin; 193 *keptrows = NULL; 194 195 ia = a->i; 196 ib = b->i; 197 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 198 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 199 for (i = 0; i < m; i++) { 200 na = ia[i + 1] - ia[i]; 201 nb = ib[i + 1] - ib[i]; 202 if (!na && !nb) { 203 cnt++; 204 goto ok1; 205 } 206 aa = aav + ia[i]; 207 for (j = 0; j < na; j++) { 208 if (aa[j] != 0.0) goto ok1; 209 } 210 bb = bav ? bav + ib[i] : NULL; 211 for (j = 0; j < nb; j++) { 212 if (bb[j] != 0.0) goto ok1; 213 } 214 cnt++; 215 ok1:; 216 } 217 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 218 if (!n0rows) { 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 220 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 221 PetscFunctionReturn(PETSC_SUCCESS); 222 } 223 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 224 cnt = 0; 225 for (i = 0; i < m; i++) { 226 na = ia[i + 1] - ia[i]; 227 nb = ib[i + 1] - ib[i]; 228 if (!na && !nb) continue; 229 aa = aav + ia[i]; 230 for (j = 0; j < na; j++) { 231 if (aa[j] != 0.0) { 232 rows[cnt++] = rstart + i; 233 goto ok2; 234 } 235 } 236 bb = bav ? bav + ib[i] : NULL; 237 for (j = 0; j < nb; j++) { 238 if (bb[j] != 0.0) { 239 rows[cnt++] = rstart + i; 240 goto ok2; 241 } 242 } 243 ok2:; 244 } 245 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 247 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 252 { 253 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 254 PetscBool cong; 255 256 PetscFunctionBegin; 257 PetscCall(MatHasCongruentLayouts(Y, &cong)); 258 if (Y->assembled && cong) { 259 PetscCall(MatDiagonalSet(aij->A, D, is)); 260 } else { 261 PetscCall(MatDiagonalSet_Default(Y, D, is)); 262 } 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 267 { 268 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 269 PetscInt i, rstart, nrows, *rows; 270 271 PetscFunctionBegin; 272 *zrows = NULL; 273 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 274 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 275 for (i = 0; i < nrows; i++) rows[i] += rstart; 276 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 283 PetscInt i, m, n, *garray = aij->garray; 284 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 285 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 286 PetscReal *work; 287 const PetscScalar *dummy; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 if (type == NORM_INFINITY) { 313 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 314 } else { 315 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 316 } 317 PetscCall(PetscFree(work)); 318 if (type == NORM_2) { 319 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 320 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 321 for (i = 0; i < n; i++) reductions[i] /= m; 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 327 { 328 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 329 IS sis, gis; 330 const PetscInt *isis, *igis; 331 PetscInt n, *iis, nsis, ngis, rstart, i; 332 333 PetscFunctionBegin; 334 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 335 PetscCall(MatFindNonzeroRows(a->B, &gis)); 336 PetscCall(ISGetSize(gis, &ngis)); 337 PetscCall(ISGetSize(sis, &nsis)); 338 PetscCall(ISGetIndices(sis, &isis)); 339 PetscCall(ISGetIndices(gis, &igis)); 340 341 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 342 PetscCall(PetscArraycpy(iis, igis, ngis)); 343 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 344 n = ngis + nsis; 345 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 346 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 347 for (i = 0; i < n; i++) iis[i] += rstart; 348 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 349 350 PetscCall(ISRestoreIndices(sis, &isis)); 351 PetscCall(ISRestoreIndices(gis, &igis)); 352 PetscCall(ISDestroy(&sis)); 353 PetscCall(ISDestroy(&gis)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 /* 358 Local utility routine that creates a mapping from the global column 359 number to the local number in the off-diagonal part of the local 360 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 361 a slightly higher hash table cost; without it it is not scalable (each processor 362 has an order N integer array but is fast to access. 363 */ 364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 365 { 366 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 367 PetscInt n = aij->B->cmap->n, i; 368 369 PetscFunctionBegin; 370 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 371 #if defined(PETSC_USE_CTABLE) 372 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 373 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 374 #else 375 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 376 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 377 #endif 378 PetscFunctionReturn(PETSC_SUCCESS); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 382 do { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col; \ 386 while (high1 - low1 > 5) { \ 387 t = (low1 + high1) / 2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i = low1; _i < high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) { \ 395 ap1[_i] += value; \ 396 /* Not sure LogFlops will slow dow the code or not */ \ 397 (void)PetscLogFlops(1.0); \ 398 } else ap1[_i] = value; \ 399 goto a_noinsert; \ 400 } \ 401 } \ 402 if (value == 0.0 && ignorezeroentries && row != col) { \ 403 low1 = 0; \ 404 high1 = nrow1; \ 405 goto a_noinsert; \ 406 } \ 407 if (nonew == 1) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 413 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 414 N = nrow1++ - 1; \ 415 a->nz++; \ 416 high1++; \ 417 /* shift up all the later entries in this row */ \ 418 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 419 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 B->nonzerostate++; \ 468 b_noinsert:; \ 469 bilen[row] = nrow2; \ 470 } while (0) 471 472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 473 { 474 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 475 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 476 PetscInt l, *garray = mat->garray, diag; 477 PetscScalar *aa, *ba; 478 479 PetscFunctionBegin; 480 /* code only works for square matrices A */ 481 482 /* find size of row to the left of the diagonal part */ 483 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 484 row = row - diag; 485 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 486 if (garray[b->j[b->i[row] + l]] > diag) break; 487 } 488 if (l) { 489 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 490 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 491 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 492 } 493 494 /* diagonal part */ 495 if (a->i[row + 1] - a->i[row]) { 496 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 497 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 498 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 499 } 500 501 /* right of diagonal part */ 502 if (b->i[row + 1] - b->i[row] - l) { 503 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 504 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 505 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 506 } 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 511 { 512 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 513 PetscScalar value = 0.0; 514 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 515 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 516 PetscBool roworiented = aij->roworiented; 517 518 /* Some Variables required in the macro */ 519 Mat A = aij->A; 520 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 521 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 522 PetscBool ignorezeroentries = a->ignorezeroentries; 523 Mat B = aij->B; 524 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 525 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 526 MatScalar *aa, *ba; 527 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 528 PetscInt nonew; 529 MatScalar *ap1, *ap2; 530 531 PetscFunctionBegin; 532 PetscCall(MatSeqAIJGetArray(A, &aa)); 533 PetscCall(MatSeqAIJGetArray(B, &ba)); 534 for (i = 0; i < m; i++) { 535 if (im[i] < 0) continue; 536 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 537 if (im[i] >= rstart && im[i] < rend) { 538 row = im[i] - rstart; 539 lastcol1 = -1; 540 rp1 = aj ? aj + ai[row] : NULL; 541 ap1 = aa ? aa + ai[row] : NULL; 542 rmax1 = aimax[row]; 543 nrow1 = ailen[row]; 544 low1 = 0; 545 high1 = nrow1; 546 lastcol2 = -1; 547 rp2 = bj ? bj + bi[row] : NULL; 548 ap2 = ba ? ba + bi[row] : NULL; 549 rmax2 = bimax[row]; 550 nrow2 = bilen[row]; 551 low2 = 0; 552 high2 = nrow2; 553 554 for (j = 0; j < n; j++) { 555 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 556 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 557 if (in[j] >= cstart && in[j] < cend) { 558 col = in[j] - cstart; 559 nonew = a->nonew; 560 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 561 } else if (in[j] < 0) { 562 continue; 563 } else { 564 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 565 if (mat->was_assembled) { 566 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 567 #if defined(PETSC_USE_CTABLE) 568 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 569 col--; 570 #else 571 col = aij->colmap[in[j]] - 1; 572 #endif 573 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 574 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 575 col = in[j]; 576 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 577 B = aij->B; 578 b = (Mat_SeqAIJ *)B->data; 579 bimax = b->imax; 580 bi = b->i; 581 bilen = b->ilen; 582 bj = b->j; 583 ba = b->a; 584 rp2 = bj + bi[row]; 585 ap2 = ba + bi[row]; 586 rmax2 = bimax[row]; 587 nrow2 = bilen[row]; 588 low2 = 0; 589 high2 = nrow2; 590 bm = aij->B->rmap->n; 591 ba = b->a; 592 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 593 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 594 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 595 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 596 } 597 } else col = in[j]; 598 nonew = b->nonew; 599 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 600 } 601 } 602 } else { 603 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 604 if (!aij->donotstash) { 605 mat->assembled = PETSC_FALSE; 606 if (roworiented) { 607 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v ? v + i * n : NULL, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 608 } else { 609 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v ? v + i : NULL, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 610 } 611 } 612 } 613 } 614 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 615 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 /* 620 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 621 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 622 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 623 */ 624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 627 Mat A = aij->A; /* diagonal part of the matrix */ 628 Mat B = aij->B; /* off-diagonal part of the matrix */ 629 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 630 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 632 PetscInt *ailen = a->ilen, *aj = a->j; 633 PetscInt *bilen = b->ilen, *bj = b->j; 634 PetscInt am = aij->A->rmap->n, j; 635 PetscInt diag_so_far = 0, dnz; 636 PetscInt offd_so_far = 0, onz; 637 638 PetscFunctionBegin; 639 /* Iterate over all rows of the matrix */ 640 for (j = 0; j < am; j++) { 641 dnz = onz = 0; 642 /* Iterate over all non-zero columns of the current row */ 643 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 644 /* If column is in the diagonal */ 645 if (mat_j[col] >= cstart && mat_j[col] < cend) { 646 aj[diag_so_far++] = mat_j[col] - cstart; 647 dnz++; 648 } else { /* off-diagonal entries */ 649 bj[offd_so_far++] = mat_j[col]; 650 onz++; 651 } 652 } 653 ailen[j] = dnz; 654 bilen[j] = onz; 655 } 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 /* 660 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 661 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 662 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 663 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 664 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 665 */ 666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 669 Mat A = aij->A; /* diagonal part of the matrix */ 670 Mat B = aij->B; /* off-diagonal part of the matrix */ 671 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 673 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 674 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 675 PetscInt *ailen = a->ilen, *aj = a->j; 676 PetscInt *bilen = b->ilen, *bj = b->j; 677 PetscInt am = aij->A->rmap->n, j; 678 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 679 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 680 PetscScalar *aa = a->a, *ba = b->a; 681 682 PetscFunctionBegin; 683 /* Iterate over all rows of the matrix */ 684 for (j = 0; j < am; j++) { 685 dnz_row = onz_row = 0; 686 rowstart_offd = full_offd_i[j]; 687 rowstart_diag = full_diag_i[j]; 688 /* Iterate over all non-zero columns of the current row */ 689 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 690 /* If column is in the diagonal */ 691 if (mat_j[col] >= cstart && mat_j[col] < cend) { 692 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 693 aa[rowstart_diag + dnz_row] = mat_a[col]; 694 dnz_row++; 695 } else { /* off-diagonal entries */ 696 bj[rowstart_offd + onz_row] = mat_j[col]; 697 ba[rowstart_offd + onz_row] = mat_a[col]; 698 onz_row++; 699 } 700 } 701 ailen[j] = dnz_row; 702 bilen[j] = onz_row; 703 } 704 PetscFunctionReturn(PETSC_SUCCESS); 705 } 706 707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 708 { 709 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 710 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 711 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 712 713 PetscFunctionBegin; 714 for (i = 0; i < m; i++) { 715 if (idxm[i] < 0) continue; /* negative row */ 716 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 717 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 718 row = idxm[i] - rstart; 719 for (j = 0; j < n; j++) { 720 if (idxn[j] < 0) continue; /* negative column */ 721 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 722 if (idxn[j] >= cstart && idxn[j] < cend) { 723 col = idxn[j] - cstart; 724 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 725 } else { 726 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 727 #if defined(PETSC_USE_CTABLE) 728 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 729 col--; 730 #else 731 col = aij->colmap[idxn[j]] - 1; 732 #endif 733 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 734 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 735 } 736 } 737 } 738 PetscFunctionReturn(PETSC_SUCCESS); 739 } 740 741 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 744 PetscInt nstash, reallocs; 745 746 PetscFunctionBegin; 747 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 748 749 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 750 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 751 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 758 PetscMPIInt n; 759 PetscInt i, j, rstart, ncols, flg; 760 PetscInt *row, *col; 761 PetscBool other_disassembled; 762 PetscScalar *val; 763 764 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 765 766 PetscFunctionBegin; 767 if (!aij->donotstash && !mat->nooffprocentries) { 768 while (1) { 769 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 770 if (!flg) break; 771 772 for (i = 0; i < n;) { 773 /* Now identify the consecutive vals belonging to the same row */ 774 for (j = i, rstart = row[j]; j < n; j++) { 775 if (row[j] != rstart) break; 776 } 777 if (j < n) ncols = j - i; 778 else ncols = n - i; 779 /* Now assemble all these values with a single function call */ 780 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 781 i = j; 782 } 783 } 784 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 785 } 786 #if defined(PETSC_HAVE_DEVICE) 787 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 788 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 789 if (mat->boundtocpu) { 790 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 791 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 792 } 793 #endif 794 PetscCall(MatAssemblyBegin(aij->A, mode)); 795 PetscCall(MatAssemblyEnd(aij->A, mode)); 796 797 /* determine if any processor has disassembled, if so we must 798 also disassemble ourself, in order that we may reassemble. */ 799 /* 800 if nonzero structure of submatrix B cannot change then we know that 801 no processor disassembled thus we can skip this stuff 802 */ 803 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 804 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 805 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 806 PetscCall(MatDisAssemble_MPIAIJ(mat)); 807 } 808 } 809 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 810 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 811 #if defined(PETSC_HAVE_DEVICE) 812 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 813 #endif 814 PetscCall(MatAssemblyBegin(aij->B, mode)); 815 PetscCall(MatAssemblyEnd(aij->B, mode)); 816 817 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 818 819 aij->rowvalues = NULL; 820 821 PetscCall(VecDestroy(&aij->diag)); 822 823 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 824 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 825 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 826 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 827 } 828 #if defined(PETSC_HAVE_DEVICE) 829 mat->offloadmask = PETSC_OFFLOAD_BOTH; 830 #endif 831 PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 835 { 836 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 837 838 PetscFunctionBegin; 839 PetscCall(MatZeroEntries(l->A)); 840 PetscCall(MatZeroEntries(l->B)); 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 845 { 846 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 847 PetscObjectState sA, sB; 848 PetscInt *lrows; 849 PetscInt r, len; 850 PetscBool cong, lch, gch; 851 852 PetscFunctionBegin; 853 /* get locally owned rows */ 854 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 855 PetscCall(MatHasCongruentLayouts(A, &cong)); 856 /* fix right hand side if needed */ 857 if (x && b) { 858 const PetscScalar *xx; 859 PetscScalar *bb; 860 861 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 862 PetscCall(VecGetArrayRead(x, &xx)); 863 PetscCall(VecGetArray(b, &bb)); 864 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 865 PetscCall(VecRestoreArrayRead(x, &xx)); 866 PetscCall(VecRestoreArray(b, &bb)); 867 } 868 869 sA = mat->A->nonzerostate; 870 sB = mat->B->nonzerostate; 871 872 if (diag != 0.0 && cong) { 873 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 874 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 875 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 876 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 877 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 878 PetscInt nnwA, nnwB; 879 PetscBool nnzA, nnzB; 880 881 nnwA = aijA->nonew; 882 nnwB = aijB->nonew; 883 nnzA = aijA->keepnonzeropattern; 884 nnzB = aijB->keepnonzeropattern; 885 if (!nnzA) { 886 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 887 aijA->nonew = 0; 888 } 889 if (!nnzB) { 890 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 891 aijB->nonew = 0; 892 } 893 /* Must zero here before the next loop */ 894 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 895 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 896 for (r = 0; r < len; ++r) { 897 const PetscInt row = lrows[r] + A->rmap->rstart; 898 if (row >= A->cmap->N) continue; 899 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 900 } 901 aijA->nonew = nnwA; 902 aijB->nonew = nnwB; 903 } else { 904 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 905 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 906 } 907 PetscCall(PetscFree(lrows)); 908 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 909 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 910 911 /* reduce nonzerostate */ 912 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 913 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 914 if (gch) A->nonzerostate++; 915 PetscFunctionReturn(PETSC_SUCCESS); 916 } 917 918 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 919 { 920 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 921 PetscMPIInt n = A->rmap->n; 922 PetscInt i, j, r, m, len = 0; 923 PetscInt *lrows, *owners = A->rmap->range; 924 PetscMPIInt p = 0; 925 PetscSFNode *rrows; 926 PetscSF sf; 927 const PetscScalar *xx; 928 PetscScalar *bb, *mask, *aij_a; 929 Vec xmask, lmask; 930 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 931 const PetscInt *aj, *ii, *ridx; 932 PetscScalar *aa; 933 934 PetscFunctionBegin; 935 /* Create SF where leaves are input rows and roots are owned rows */ 936 PetscCall(PetscMalloc1(n, &lrows)); 937 for (r = 0; r < n; ++r) lrows[r] = -1; 938 PetscCall(PetscMalloc1(N, &rrows)); 939 for (r = 0; r < N; ++r) { 940 const PetscInt idx = rows[r]; 941 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 942 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 943 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 944 } 945 rrows[r].rank = p; 946 rrows[r].index = rows[r] - owners[p]; 947 } 948 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 949 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 950 /* Collect flags for rows to be zeroed */ 951 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 952 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 953 PetscCall(PetscSFDestroy(&sf)); 954 /* Compress and put in row numbers */ 955 for (r = 0; r < n; ++r) 956 if (lrows[r] >= 0) lrows[len++] = r; 957 /* zero diagonal part of matrix */ 958 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 959 /* handle off-diagonal part of matrix */ 960 PetscCall(MatCreateVecs(A, &xmask, NULL)); 961 PetscCall(VecDuplicate(l->lvec, &lmask)); 962 PetscCall(VecGetArray(xmask, &bb)); 963 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 964 PetscCall(VecRestoreArray(xmask, &bb)); 965 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 966 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 967 PetscCall(VecDestroy(&xmask)); 968 if (x && b) { /* this code is buggy when the row and column layout don't match */ 969 PetscBool cong; 970 971 PetscCall(MatHasCongruentLayouts(A, &cong)); 972 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 973 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 974 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 975 PetscCall(VecGetArrayRead(l->lvec, &xx)); 976 PetscCall(VecGetArray(b, &bb)); 977 } 978 PetscCall(VecGetArray(lmask, &mask)); 979 /* remove zeroed rows of off-diagonal matrix */ 980 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 981 ii = aij->i; 982 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 983 /* loop over all elements of off process part of matrix zeroing removed columns*/ 984 if (aij->compressedrow.use) { 985 m = aij->compressedrow.nrows; 986 ii = aij->compressedrow.i; 987 ridx = aij->compressedrow.rindex; 988 for (i = 0; i < m; i++) { 989 n = ii[i + 1] - ii[i]; 990 aj = aij->j + ii[i]; 991 aa = aij_a + ii[i]; 992 993 for (j = 0; j < n; j++) { 994 if (PetscAbsScalar(mask[*aj])) { 995 if (b) bb[*ridx] -= *aa * xx[*aj]; 996 *aa = 0.0; 997 } 998 aa++; 999 aj++; 1000 } 1001 ridx++; 1002 } 1003 } else { /* do not use compressed row format */ 1004 m = l->B->rmap->n; 1005 for (i = 0; i < m; i++) { 1006 n = ii[i + 1] - ii[i]; 1007 aj = aij->j + ii[i]; 1008 aa = aij_a + ii[i]; 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[i] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 } 1018 } 1019 if (x && b) { 1020 PetscCall(VecRestoreArray(b, &bb)); 1021 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1022 } 1023 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1024 PetscCall(VecRestoreArray(lmask, &mask)); 1025 PetscCall(VecDestroy(&lmask)); 1026 PetscCall(PetscFree(lrows)); 1027 1028 /* only change matrix nonzero state if pattern was allowed to be changed */ 1029 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1030 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1031 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1032 } 1033 PetscFunctionReturn(PETSC_SUCCESS); 1034 } 1035 1036 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1037 { 1038 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1039 PetscInt nt; 1040 VecScatter Mvctx = a->Mvctx; 1041 1042 PetscFunctionBegin; 1043 PetscCall(VecGetLocalSize(xx, &nt)); 1044 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1045 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1046 PetscUseTypeMethod(a->A, mult, xx, yy); 1047 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1048 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 1056 PetscFunctionBegin; 1057 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1058 PetscFunctionReturn(PETSC_SUCCESS); 1059 } 1060 1061 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1062 { 1063 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1064 VecScatter Mvctx = a->Mvctx; 1065 1066 PetscFunctionBegin; 1067 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1068 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1069 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1070 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1071 PetscFunctionReturn(PETSC_SUCCESS); 1072 } 1073 1074 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1075 { 1076 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1077 1078 PetscFunctionBegin; 1079 /* do nondiagonal part */ 1080 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1081 /* do local part */ 1082 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1083 /* add partial results together */ 1084 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1085 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1090 { 1091 MPI_Comm comm; 1092 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1093 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1094 IS Me, Notme; 1095 PetscInt M, N, first, last, *notme, i; 1096 PetscBool lf; 1097 PetscMPIInt size; 1098 1099 PetscFunctionBegin; 1100 /* Easy test: symmetric diagonal block */ 1101 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1102 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1103 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1104 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1105 PetscCallMPI(MPI_Comm_size(comm, &size)); 1106 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1107 1108 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1109 PetscCall(MatGetSize(Amat, &M, &N)); 1110 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1111 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1112 for (i = 0; i < first; i++) notme[i] = i; 1113 for (i = last; i < M; i++) notme[i - last + first] = i; 1114 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1115 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1116 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1117 Aoff = Aoffs[0]; 1118 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1119 Boff = Boffs[0]; 1120 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1121 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1122 PetscCall(MatDestroyMatrices(1, &Boffs)); 1123 PetscCall(ISDestroy(&Me)); 1124 PetscCall(ISDestroy(&Notme)); 1125 PetscCall(PetscFree(notme)); 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128 1129 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1130 { 1131 PetscFunctionBegin; 1132 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1133 PetscFunctionReturn(PETSC_SUCCESS); 1134 } 1135 1136 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1137 { 1138 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1139 1140 PetscFunctionBegin; 1141 /* do nondiagonal part */ 1142 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1143 /* do local part */ 1144 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1145 /* add partial results together */ 1146 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1147 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1148 PetscFunctionReturn(PETSC_SUCCESS); 1149 } 1150 1151 /* 1152 This only works correctly for square matrices where the subblock A->A is the 1153 diagonal block 1154 */ 1155 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1161 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1162 PetscCall(MatGetDiagonal(a->A, v)); 1163 PetscFunctionReturn(PETSC_SUCCESS); 1164 } 1165 1166 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1167 { 1168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCall(MatScale(a->A, aa)); 1172 PetscCall(MatScale(a->B, aa)); 1173 PetscFunctionReturn(PETSC_SUCCESS); 1174 } 1175 1176 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1177 { 1178 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1179 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1180 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1181 const PetscInt *garray = aij->garray; 1182 const PetscScalar *aa, *ba; 1183 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1184 PetscInt64 nz, hnz; 1185 PetscInt *rowlens; 1186 PetscInt *colidxs; 1187 PetscScalar *matvals; 1188 PetscMPIInt rank; 1189 1190 PetscFunctionBegin; 1191 PetscCall(PetscViewerSetUp(viewer)); 1192 1193 M = mat->rmap->N; 1194 N = mat->cmap->N; 1195 m = mat->rmap->n; 1196 rs = mat->rmap->rstart; 1197 cs = mat->cmap->rstart; 1198 nz = A->nz + B->nz; 1199 1200 /* write matrix header */ 1201 header[0] = MAT_FILE_CLASSID; 1202 header[1] = M; 1203 header[2] = N; 1204 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1205 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1206 if (rank == 0) { 1207 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1208 else header[3] = (PetscInt)hnz; 1209 } 1210 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1211 1212 /* fill in and store row lengths */ 1213 PetscCall(PetscMalloc1(m, &rowlens)); 1214 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1215 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1216 PetscCall(PetscFree(rowlens)); 1217 1218 /* fill in and store column indices */ 1219 PetscCall(PetscMalloc1(nz, &colidxs)); 1220 for (cnt = 0, i = 0; i < m; i++) { 1221 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1222 if (garray[B->j[jb]] > cs) break; 1223 colidxs[cnt++] = garray[B->j[jb]]; 1224 } 1225 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1226 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1227 } 1228 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1229 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1230 PetscCall(PetscFree(colidxs)); 1231 1232 /* fill in and store nonzero values */ 1233 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1235 PetscCall(PetscMalloc1(nz, &matvals)); 1236 for (cnt = 0, i = 0; i < m; i++) { 1237 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1242 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1243 } 1244 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1245 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1246 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1247 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1248 PetscCall(PetscFree(matvals)); 1249 1250 /* write block size option to the viewer's .info file */ 1251 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1252 PetscFunctionReturn(PETSC_SUCCESS); 1253 } 1254 1255 #include <petscdraw.h> 1256 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1257 { 1258 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1259 PetscMPIInt rank = aij->rank, size = aij->size; 1260 PetscBool isdraw, iascii, isbinary; 1261 PetscViewer sviewer; 1262 PetscViewerFormat format; 1263 1264 PetscFunctionBegin; 1265 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1266 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1267 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1268 if (iascii) { 1269 PetscCall(PetscViewerGetFormat(viewer, &format)); 1270 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1271 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1272 PetscCall(PetscMalloc1(size, &nz)); 1273 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1274 for (i = 0; i < (PetscInt)size; i++) { 1275 nmax = PetscMax(nmax, nz[i]); 1276 nmin = PetscMin(nmin, nz[i]); 1277 navg += nz[i]; 1278 } 1279 PetscCall(PetscFree(nz)); 1280 navg = navg / size; 1281 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 PetscCall(PetscViewerGetFormat(viewer, &format)); 1285 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1286 MatInfo info; 1287 PetscInt *inodes = NULL; 1288 1289 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1290 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1291 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1292 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1293 if (!inodes) { 1294 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1295 (double)info.memory)); 1296 } else { 1297 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1298 (double)info.memory)); 1299 } 1300 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1301 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1302 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1303 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1304 PetscCall(PetscViewerFlush(viewer)); 1305 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1306 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1307 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1310 PetscInt inodecount, inodelimit, *inodes; 1311 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1312 if (inodes) { 1313 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1314 } else { 1315 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } 1321 } else if (isbinary) { 1322 if (size == 1) { 1323 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1324 PetscCall(MatView(aij->A, viewer)); 1325 } else { 1326 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1327 } 1328 PetscFunctionReturn(PETSC_SUCCESS); 1329 } else if (iascii && size == 1) { 1330 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1331 PetscCall(MatView(aij->A, viewer)); 1332 PetscFunctionReturn(PETSC_SUCCESS); 1333 } else if (isdraw) { 1334 PetscDraw draw; 1335 PetscBool isnull; 1336 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1337 PetscCall(PetscDrawIsNull(draw, &isnull)); 1338 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1339 } 1340 1341 { /* assemble the entire matrix onto first processor */ 1342 Mat A = NULL, Av; 1343 IS isrow, iscol; 1344 1345 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1346 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1347 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1348 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1349 /* The commented code uses MatCreateSubMatrices instead */ 1350 /* 1351 Mat *AA, A = NULL, Av; 1352 IS isrow,iscol; 1353 1354 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1355 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1356 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1357 if (rank == 0) { 1358 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1359 A = AA[0]; 1360 Av = AA[0]; 1361 } 1362 PetscCall(MatDestroySubMatrices(1,&AA)); 1363 */ 1364 PetscCall(ISDestroy(&iscol)); 1365 PetscCall(ISDestroy(&isrow)); 1366 /* 1367 Everyone has to call to draw the matrix since the graphics waits are 1368 synchronized across all processors that share the PetscDraw object 1369 */ 1370 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1371 if (rank == 0) { 1372 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1373 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1374 } 1375 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1376 PetscCall(MatDestroy(&A)); 1377 } 1378 PetscFunctionReturn(PETSC_SUCCESS); 1379 } 1380 1381 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1382 { 1383 PetscBool iascii, isdraw, issocket, isbinary; 1384 1385 PetscFunctionBegin; 1386 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1387 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1388 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1389 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1390 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1391 PetscFunctionReturn(PETSC_SUCCESS); 1392 } 1393 1394 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1395 { 1396 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1397 Vec bb1 = NULL; 1398 PetscBool hasop; 1399 1400 PetscFunctionBegin; 1401 if (flag == SOR_APPLY_UPPER) { 1402 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1403 PetscFunctionReturn(PETSC_SUCCESS); 1404 } 1405 1406 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1407 1408 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1409 if (flag & SOR_ZERO_INITIAL_GUESS) { 1410 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1411 its--; 1412 } 1413 1414 while (its--) { 1415 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1416 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1417 1418 /* update rhs: bb1 = bb - B*x */ 1419 PetscCall(VecScale(mat->lvec, -1.0)); 1420 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1421 1422 /* local sweep */ 1423 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1424 } 1425 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1426 if (flag & SOR_ZERO_INITIAL_GUESS) { 1427 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1428 its--; 1429 } 1430 while (its--) { 1431 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1432 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1433 1434 /* update rhs: bb1 = bb - B*x */ 1435 PetscCall(VecScale(mat->lvec, -1.0)); 1436 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1437 1438 /* local sweep */ 1439 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1440 } 1441 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1442 if (flag & SOR_ZERO_INITIAL_GUESS) { 1443 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1444 its--; 1445 } 1446 while (its--) { 1447 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1448 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1449 1450 /* update rhs: bb1 = bb - B*x */ 1451 PetscCall(VecScale(mat->lvec, -1.0)); 1452 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1453 1454 /* local sweep */ 1455 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1456 } 1457 } else if (flag & SOR_EISENSTAT) { 1458 Vec xx1; 1459 1460 PetscCall(VecDuplicate(bb, &xx1)); 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1462 1463 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1464 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1465 if (!mat->diag) { 1466 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1467 PetscCall(MatGetDiagonal(matin, mat->diag)); 1468 } 1469 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1470 if (hasop) { 1471 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1472 } else { 1473 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1474 } 1475 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1476 1477 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1478 1479 /* local sweep */ 1480 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1481 PetscCall(VecAXPY(xx, 1.0, xx1)); 1482 PetscCall(VecDestroy(&xx1)); 1483 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1484 1485 PetscCall(VecDestroy(&bb1)); 1486 1487 matin->factorerrortype = mat->A->factorerrortype; 1488 PetscFunctionReturn(PETSC_SUCCESS); 1489 } 1490 1491 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1492 { 1493 Mat aA, aB, Aperm; 1494 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1495 PetscScalar *aa, *ba; 1496 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1497 PetscSF rowsf, sf; 1498 IS parcolp = NULL; 1499 PetscBool done; 1500 1501 PetscFunctionBegin; 1502 PetscCall(MatGetLocalSize(A, &m, &n)); 1503 PetscCall(ISGetIndices(rowp, &rwant)); 1504 PetscCall(ISGetIndices(colp, &cwant)); 1505 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1506 1507 /* Invert row permutation to find out where my rows should go */ 1508 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1509 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1510 PetscCall(PetscSFSetFromOptions(rowsf)); 1511 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1512 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1513 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1514 1515 /* Invert column permutation to find out where my columns should go */ 1516 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1517 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1518 PetscCall(PetscSFSetFromOptions(sf)); 1519 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1520 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1521 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1522 PetscCall(PetscSFDestroy(&sf)); 1523 1524 PetscCall(ISRestoreIndices(rowp, &rwant)); 1525 PetscCall(ISRestoreIndices(colp, &cwant)); 1526 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1527 1528 /* Find out where my gcols should go */ 1529 PetscCall(MatGetSize(aB, NULL, &ng)); 1530 PetscCall(PetscMalloc1(ng, &gcdest)); 1531 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1532 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1533 PetscCall(PetscSFSetFromOptions(sf)); 1534 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1535 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1536 PetscCall(PetscSFDestroy(&sf)); 1537 1538 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1539 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1540 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1541 for (i = 0; i < m; i++) { 1542 PetscInt row = rdest[i]; 1543 PetscMPIInt rowner; 1544 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1545 for (j = ai[i]; j < ai[i + 1]; j++) { 1546 PetscInt col = cdest[aj[j]]; 1547 PetscMPIInt cowner; 1548 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1549 if (rowner == cowner) dnnz[i]++; 1550 else onnz[i]++; 1551 } 1552 for (j = bi[i]; j < bi[i + 1]; j++) { 1553 PetscInt col = gcdest[bj[j]]; 1554 PetscMPIInt cowner; 1555 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1556 if (rowner == cowner) dnnz[i]++; 1557 else onnz[i]++; 1558 } 1559 } 1560 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1561 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1562 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1563 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1564 PetscCall(PetscSFDestroy(&rowsf)); 1565 1566 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1567 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1568 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1569 for (i = 0; i < m; i++) { 1570 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1571 PetscInt j0, rowlen; 1572 rowlen = ai[i + 1] - ai[i]; 1573 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1574 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1575 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1576 } 1577 rowlen = bi[i + 1] - bi[i]; 1578 for (j0 = j = 0; j < rowlen; j0 = j) { 1579 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1580 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1581 } 1582 } 1583 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1584 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1585 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1586 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1587 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1588 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1589 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1590 PetscCall(PetscFree3(work, rdest, cdest)); 1591 PetscCall(PetscFree(gcdest)); 1592 if (parcolp) PetscCall(ISDestroy(&colp)); 1593 *B = Aperm; 1594 PetscFunctionReturn(PETSC_SUCCESS); 1595 } 1596 1597 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1598 { 1599 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1600 1601 PetscFunctionBegin; 1602 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1603 if (ghosts) *ghosts = aij->garray; 1604 PetscFunctionReturn(PETSC_SUCCESS); 1605 } 1606 1607 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1608 { 1609 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1610 Mat A = mat->A, B = mat->B; 1611 PetscLogDouble isend[5], irecv[5]; 1612 1613 PetscFunctionBegin; 1614 info->block_size = 1.0; 1615 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1616 1617 isend[0] = info->nz_used; 1618 isend[1] = info->nz_allocated; 1619 isend[2] = info->nz_unneeded; 1620 isend[3] = info->memory; 1621 isend[4] = info->mallocs; 1622 1623 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1624 1625 isend[0] += info->nz_used; 1626 isend[1] += info->nz_allocated; 1627 isend[2] += info->nz_unneeded; 1628 isend[3] += info->memory; 1629 isend[4] += info->mallocs; 1630 if (flag == MAT_LOCAL) { 1631 info->nz_used = isend[0]; 1632 info->nz_allocated = isend[1]; 1633 info->nz_unneeded = isend[2]; 1634 info->memory = isend[3]; 1635 info->mallocs = isend[4]; 1636 } else if (flag == MAT_GLOBAL_MAX) { 1637 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1638 1639 info->nz_used = irecv[0]; 1640 info->nz_allocated = irecv[1]; 1641 info->nz_unneeded = irecv[2]; 1642 info->memory = irecv[3]; 1643 info->mallocs = irecv[4]; 1644 } else if (flag == MAT_GLOBAL_SUM) { 1645 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1646 1647 info->nz_used = irecv[0]; 1648 info->nz_allocated = irecv[1]; 1649 info->nz_unneeded = irecv[2]; 1650 info->memory = irecv[3]; 1651 info->mallocs = irecv[4]; 1652 } 1653 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1654 info->fill_ratio_needed = 0; 1655 info->factor_mallocs = 0; 1656 PetscFunctionReturn(PETSC_SUCCESS); 1657 } 1658 1659 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1660 { 1661 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1662 1663 PetscFunctionBegin; 1664 switch (op) { 1665 case MAT_NEW_NONZERO_LOCATIONS: 1666 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1667 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1668 case MAT_KEEP_NONZERO_PATTERN: 1669 case MAT_NEW_NONZERO_LOCATION_ERR: 1670 case MAT_USE_INODES: 1671 case MAT_IGNORE_ZERO_ENTRIES: 1672 case MAT_FORM_EXPLICIT_TRANSPOSE: 1673 MatCheckPreallocated(A, 1); 1674 PetscCall(MatSetOption(a->A, op, flg)); 1675 PetscCall(MatSetOption(a->B, op, flg)); 1676 break; 1677 case MAT_ROW_ORIENTED: 1678 MatCheckPreallocated(A, 1); 1679 a->roworiented = flg; 1680 1681 PetscCall(MatSetOption(a->A, op, flg)); 1682 PetscCall(MatSetOption(a->B, op, flg)); 1683 break; 1684 case MAT_FORCE_DIAGONAL_ENTRIES: 1685 case MAT_SORTED_FULL: 1686 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1687 break; 1688 case MAT_IGNORE_OFF_PROC_ENTRIES: 1689 a->donotstash = flg; 1690 break; 1691 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1692 case MAT_SPD: 1693 case MAT_SYMMETRIC: 1694 case MAT_STRUCTURALLY_SYMMETRIC: 1695 case MAT_HERMITIAN: 1696 case MAT_SYMMETRY_ETERNAL: 1697 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1698 case MAT_SPD_ETERNAL: 1699 /* if the diagonal matrix is square it inherits some of the properties above */ 1700 break; 1701 case MAT_SUBMAT_SINGLEIS: 1702 A->submat_singleis = flg; 1703 break; 1704 case MAT_STRUCTURE_ONLY: 1705 /* The option is handled directly by MatSetOption() */ 1706 break; 1707 default: 1708 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1716 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1717 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1718 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1719 PetscInt *cmap, *idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1730 PetscInt max = 1, tmp; 1731 for (i = 0; i < matin->rmap->n; i++) { 1732 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1736 } 1737 1738 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; 1742 pcA = &cworkA; 1743 pvB = &vworkB; 1744 pcB = &cworkB; 1745 if (!v) { 1746 pvA = NULL; 1747 pvB = NULL; 1748 } 1749 if (!idx) { 1750 pcA = NULL; 1751 if (!v) pcB = NULL; 1752 } 1753 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1770 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1776 } else { 1777 for (i = 0; i < nzB; i++) { 1778 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1779 else break; 1780 } 1781 imark = i; 1782 } 1783 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1784 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1785 } 1786 } else { 1787 if (idx) *idx = NULL; 1788 if (v) *v = NULL; 1789 } 1790 } 1791 *nz = nztot; 1792 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1793 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 1801 PetscFunctionBegin; 1802 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1803 aij->getrowactive = PETSC_FALSE; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1808 { 1809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1810 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1811 PetscInt i, j, cstart = mat->cmap->rstart; 1812 PetscReal sum = 0.0; 1813 const MatScalar *v, *amata, *bmata; 1814 1815 PetscFunctionBegin; 1816 if (aij->size == 1) { 1817 PetscCall(MatNorm(aij->A, type, norm)); 1818 } else { 1819 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1820 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1821 if (type == NORM_FROBENIUS) { 1822 v = amata; 1823 for (i = 0; i < amat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v) * (*v)); 1825 v++; 1826 } 1827 v = bmata; 1828 for (i = 0; i < bmat->nz; i++) { 1829 sum += PetscRealPart(PetscConj(*v) * (*v)); 1830 v++; 1831 } 1832 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp, *tmp2; 1837 PetscInt *jj, *garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1839 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1840 *norm = 0.0; 1841 v = amata; 1842 jj = amat->j; 1843 for (j = 0; j < amat->nz; j++) { 1844 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1845 v++; 1846 } 1847 v = bmata; 1848 jj = bmat->j; 1849 for (j = 0; j < bmat->nz; j++) { 1850 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1851 v++; 1852 } 1853 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1854 for (j = 0; j < mat->cmap->N; j++) { 1855 if (tmp2[j] > *norm) *norm = tmp2[j]; 1856 } 1857 PetscCall(PetscFree(tmp)); 1858 PetscCall(PetscFree(tmp2)); 1859 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1860 } else if (type == NORM_INFINITY) { /* max row norm */ 1861 PetscReal ntemp = 0.0; 1862 for (j = 0; j < aij->A->rmap->n; j++) { 1863 v = amata + amat->i[j]; 1864 sum = 0.0; 1865 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1866 sum += PetscAbsScalar(*v); 1867 v++; 1868 } 1869 v = bmata + bmat->i[j]; 1870 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1871 sum += PetscAbsScalar(*v); 1872 v++; 1873 } 1874 if (sum > ntemp) ntemp = sum; 1875 } 1876 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1877 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1878 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1879 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1880 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1881 } 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1886 { 1887 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1888 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1889 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1890 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1891 Mat B, A_diag, *B_diag; 1892 const MatScalar *pbv, *bv; 1893 1894 PetscFunctionBegin; 1895 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1896 ma = A->rmap->n; 1897 na = A->cmap->n; 1898 mb = a->B->rmap->n; 1899 nb = a->B->cmap->n; 1900 ai = Aloc->i; 1901 aj = Aloc->j; 1902 bi = Bloc->i; 1903 bj = Bloc->j; 1904 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1905 PetscInt *d_nnz, *g_nnz, *o_nnz; 1906 PetscSFNode *oloc; 1907 PETSC_UNUSED PetscSF sf; 1908 1909 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1910 /* compute d_nnz for preallocation */ 1911 PetscCall(PetscArrayzero(d_nnz, na)); 1912 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1913 /* compute local off-diagonal contributions */ 1914 PetscCall(PetscArrayzero(g_nnz, nb)); 1915 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1916 /* map those to global */ 1917 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1918 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1919 PetscCall(PetscSFSetFromOptions(sf)); 1920 PetscCall(PetscArrayzero(o_nnz, na)); 1921 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1922 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1923 PetscCall(PetscSFDestroy(&sf)); 1924 1925 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1926 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1927 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1928 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1929 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1930 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1931 } else { 1932 B = *matout; 1933 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1934 } 1935 1936 b = (Mat_MPIAIJ *)B->data; 1937 A_diag = a->A; 1938 B_diag = &b->A; 1939 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1940 A_diag_ncol = A_diag->cmap->N; 1941 B_diag_ilen = sub_B_diag->ilen; 1942 B_diag_i = sub_B_diag->i; 1943 1944 /* Set ilen for diagonal of B */ 1945 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1946 1947 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1948 very quickly (=without using MatSetValues), because all writes are local. */ 1949 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1950 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1951 1952 /* copy over the B part */ 1953 PetscCall(PetscMalloc1(bi[mb], &cols)); 1954 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1955 pbv = bv; 1956 row = A->rmap->rstart; 1957 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1958 cols_tmp = cols; 1959 for (i = 0; i < mb; i++) { 1960 ncol = bi[i + 1] - bi[i]; 1961 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1962 row++; 1963 if (pbv) pbv += ncol; 1964 if (cols_tmp) cols_tmp += ncol; 1965 } 1966 PetscCall(PetscFree(cols)); 1967 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1968 1969 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1970 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1971 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1972 *matout = B; 1973 } else { 1974 PetscCall(MatHeaderMerge(A, &B)); 1975 } 1976 PetscFunctionReturn(PETSC_SUCCESS); 1977 } 1978 1979 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1980 { 1981 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1982 Mat a = aij->A, b = aij->B; 1983 PetscInt s1, s2, s3; 1984 1985 PetscFunctionBegin; 1986 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1987 if (rr) { 1988 PetscCall(VecGetLocalSize(rr, &s1)); 1989 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1990 /* Overlap communication with computation. */ 1991 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1992 } 1993 if (ll) { 1994 PetscCall(VecGetLocalSize(ll, &s1)); 1995 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1996 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1997 } 1998 /* scale the diagonal block */ 1999 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2000 2001 if (rr) { 2002 /* Do a scatter end and then right scale the off-diagonal block */ 2003 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2004 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2005 } 2006 PetscFunctionReturn(PETSC_SUCCESS); 2007 } 2008 2009 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2010 { 2011 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2012 2013 PetscFunctionBegin; 2014 PetscCall(MatSetUnfactored(a->A)); 2015 PetscFunctionReturn(PETSC_SUCCESS); 2016 } 2017 2018 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2019 { 2020 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2021 Mat a, b, c, d; 2022 PetscBool flg; 2023 2024 PetscFunctionBegin; 2025 a = matA->A; 2026 b = matA->B; 2027 c = matB->A; 2028 d = matB->B; 2029 2030 PetscCall(MatEqual(a, c, &flg)); 2031 if (flg) PetscCall(MatEqual(b, d, &flg)); 2032 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2033 PetscFunctionReturn(PETSC_SUCCESS); 2034 } 2035 2036 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2037 { 2038 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2039 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2040 2041 PetscFunctionBegin; 2042 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2043 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2044 /* because of the column compression in the off-processor part of the matrix a->B, 2045 the number of columns in a->B and b->B may be different, hence we cannot call 2046 the MatCopy() directly on the two parts. If need be, we can provide a more 2047 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2048 then copying the submatrices */ 2049 PetscCall(MatCopy_Basic(A, B, str)); 2050 } else { 2051 PetscCall(MatCopy(a->A, b->A, str)); 2052 PetscCall(MatCopy(a->B, b->B, str)); 2053 } 2054 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2055 PetscFunctionReturn(PETSC_SUCCESS); 2056 } 2057 2058 /* 2059 Computes the number of nonzeros per row needed for preallocation when X and Y 2060 have different nonzero structure. 2061 */ 2062 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2063 { 2064 PetscInt i, j, k, nzx, nzy; 2065 2066 PetscFunctionBegin; 2067 /* Set the number of nonzeros in the new matrix */ 2068 for (i = 0; i < m; i++) { 2069 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2070 nzx = xi[i + 1] - xi[i]; 2071 nzy = yi[i + 1] - yi[i]; 2072 nnz[i] = 0; 2073 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2074 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2075 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2076 nnz[i]++; 2077 } 2078 for (; k < nzy; k++) nnz[i]++; 2079 } 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2084 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2085 { 2086 PetscInt m = Y->rmap->N; 2087 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2088 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2089 2090 PetscFunctionBegin; 2091 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2092 PetscFunctionReturn(PETSC_SUCCESS); 2093 } 2094 2095 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2096 { 2097 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2098 2099 PetscFunctionBegin; 2100 if (str == SAME_NONZERO_PATTERN) { 2101 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2102 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2103 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2104 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2105 } else { 2106 Mat B; 2107 PetscInt *nnz_d, *nnz_o; 2108 2109 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2110 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2111 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2112 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2113 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2114 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2115 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2116 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2117 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2118 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2119 PetscCall(MatHeaderMerge(Y, &B)); 2120 PetscCall(PetscFree(nnz_d)); 2121 PetscCall(PetscFree(nnz_o)); 2122 } 2123 PetscFunctionReturn(PETSC_SUCCESS); 2124 } 2125 2126 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2127 2128 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2129 { 2130 PetscFunctionBegin; 2131 if (PetscDefined(USE_COMPLEX)) { 2132 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2133 2134 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2135 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2136 } 2137 PetscFunctionReturn(PETSC_SUCCESS); 2138 } 2139 2140 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2141 { 2142 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2143 2144 PetscFunctionBegin; 2145 PetscCall(MatRealPart(a->A)); 2146 PetscCall(MatRealPart(a->B)); 2147 PetscFunctionReturn(PETSC_SUCCESS); 2148 } 2149 2150 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatImaginaryPart(a->A)); 2156 PetscCall(MatImaginaryPart(a->B)); 2157 PetscFunctionReturn(PETSC_SUCCESS); 2158 } 2159 2160 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2163 PetscInt i, *idxb = NULL, m = A->rmap->n; 2164 PetscScalar *va, *vv; 2165 Vec vB, vA; 2166 const PetscScalar *vb; 2167 2168 PetscFunctionBegin; 2169 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2170 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2171 2172 PetscCall(VecGetArrayWrite(vA, &va)); 2173 if (idx) { 2174 for (i = 0; i < m; i++) { 2175 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2176 } 2177 } 2178 2179 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2180 PetscCall(PetscMalloc1(m, &idxb)); 2181 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2182 2183 PetscCall(VecGetArrayWrite(v, &vv)); 2184 PetscCall(VecGetArrayRead(vB, &vb)); 2185 for (i = 0; i < m; i++) { 2186 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2187 vv[i] = vb[i]; 2188 if (idx) idx[i] = a->garray[idxb[i]]; 2189 } else { 2190 vv[i] = va[i]; 2191 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2192 } 2193 } 2194 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2195 PetscCall(VecRestoreArrayWrite(vA, &va)); 2196 PetscCall(VecRestoreArrayRead(vB, &vb)); 2197 PetscCall(PetscFree(idxb)); 2198 PetscCall(VecDestroy(&vA)); 2199 PetscCall(VecDestroy(&vB)); 2200 PetscFunctionReturn(PETSC_SUCCESS); 2201 } 2202 2203 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2204 { 2205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2206 PetscInt m = A->rmap->n, n = A->cmap->n; 2207 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2208 PetscInt *cmap = mat->garray; 2209 PetscInt *diagIdx, *offdiagIdx; 2210 Vec diagV, offdiagV; 2211 PetscScalar *a, *diagA, *offdiagA; 2212 const PetscScalar *ba, *bav; 2213 PetscInt r, j, col, ncols, *bi, *bj; 2214 Mat B = mat->B; 2215 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2216 2217 PetscFunctionBegin; 2218 /* When a process holds entire A and other processes have no entry */ 2219 if (A->cmap->N == n) { 2220 PetscCall(VecGetArrayWrite(v, &diagA)); 2221 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2222 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2223 PetscCall(VecDestroy(&diagV)); 2224 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2225 PetscFunctionReturn(PETSC_SUCCESS); 2226 } else if (n == 0) { 2227 if (m) { 2228 PetscCall(VecGetArrayWrite(v, &a)); 2229 for (r = 0; r < m; r++) { 2230 a[r] = 0.0; 2231 if (idx) idx[r] = -1; 2232 } 2233 PetscCall(VecRestoreArrayWrite(v, &a)); 2234 } 2235 PetscFunctionReturn(PETSC_SUCCESS); 2236 } 2237 2238 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2239 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2240 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2241 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2242 2243 /* Get offdiagIdx[] for implicit 0.0 */ 2244 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2245 ba = bav; 2246 bi = b->i; 2247 bj = b->j; 2248 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2249 for (r = 0; r < m; r++) { 2250 ncols = bi[r + 1] - bi[r]; 2251 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2252 offdiagA[r] = *ba; 2253 offdiagIdx[r] = cmap[0]; 2254 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2255 offdiagA[r] = 0.0; 2256 2257 /* Find first hole in the cmap */ 2258 for (j = 0; j < ncols; j++) { 2259 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2260 if (col > j && j < cstart) { 2261 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2262 break; 2263 } else if (col > j + n && j >= cstart) { 2264 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2265 break; 2266 } 2267 } 2268 if (j == ncols && ncols < A->cmap->N - n) { 2269 /* a hole is outside compressed Bcols */ 2270 if (ncols == 0) { 2271 if (cstart) { 2272 offdiagIdx[r] = 0; 2273 } else offdiagIdx[r] = cend; 2274 } else { /* ncols > 0 */ 2275 offdiagIdx[r] = cmap[ncols - 1] + 1; 2276 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2277 } 2278 } 2279 } 2280 2281 for (j = 0; j < ncols; j++) { 2282 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2283 offdiagA[r] = *ba; 2284 offdiagIdx[r] = cmap[*bj]; 2285 } 2286 ba++; 2287 bj++; 2288 } 2289 } 2290 2291 PetscCall(VecGetArrayWrite(v, &a)); 2292 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2293 for (r = 0; r < m; ++r) { 2294 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2295 a[r] = diagA[r]; 2296 if (idx) idx[r] = cstart + diagIdx[r]; 2297 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2298 a[r] = diagA[r]; 2299 if (idx) { 2300 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2301 idx[r] = cstart + diagIdx[r]; 2302 } else idx[r] = offdiagIdx[r]; 2303 } 2304 } else { 2305 a[r] = offdiagA[r]; 2306 if (idx) idx[r] = offdiagIdx[r]; 2307 } 2308 } 2309 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2310 PetscCall(VecRestoreArrayWrite(v, &a)); 2311 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2312 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2313 PetscCall(VecDestroy(&diagV)); 2314 PetscCall(VecDestroy(&offdiagV)); 2315 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2316 PetscFunctionReturn(PETSC_SUCCESS); 2317 } 2318 2319 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2320 { 2321 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2322 PetscInt m = A->rmap->n, n = A->cmap->n; 2323 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2324 PetscInt *cmap = mat->garray; 2325 PetscInt *diagIdx, *offdiagIdx; 2326 Vec diagV, offdiagV; 2327 PetscScalar *a, *diagA, *offdiagA; 2328 const PetscScalar *ba, *bav; 2329 PetscInt r, j, col, ncols, *bi, *bj; 2330 Mat B = mat->B; 2331 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2332 2333 PetscFunctionBegin; 2334 /* When a process holds entire A and other processes have no entry */ 2335 if (A->cmap->N == n) { 2336 PetscCall(VecGetArrayWrite(v, &diagA)); 2337 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2338 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2339 PetscCall(VecDestroy(&diagV)); 2340 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2341 PetscFunctionReturn(PETSC_SUCCESS); 2342 } else if (n == 0) { 2343 if (m) { 2344 PetscCall(VecGetArrayWrite(v, &a)); 2345 for (r = 0; r < m; r++) { 2346 a[r] = PETSC_MAX_REAL; 2347 if (idx) idx[r] = -1; 2348 } 2349 PetscCall(VecRestoreArrayWrite(v, &a)); 2350 } 2351 PetscFunctionReturn(PETSC_SUCCESS); 2352 } 2353 2354 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2355 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2357 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2358 2359 /* Get offdiagIdx[] for implicit 0.0 */ 2360 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2361 ba = bav; 2362 bi = b->i; 2363 bj = b->j; 2364 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2365 for (r = 0; r < m; r++) { 2366 ncols = bi[r + 1] - bi[r]; 2367 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2368 offdiagA[r] = *ba; 2369 offdiagIdx[r] = cmap[0]; 2370 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2371 offdiagA[r] = 0.0; 2372 2373 /* Find first hole in the cmap */ 2374 for (j = 0; j < ncols; j++) { 2375 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2376 if (col > j && j < cstart) { 2377 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2378 break; 2379 } else if (col > j + n && j >= cstart) { 2380 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2381 break; 2382 } 2383 } 2384 if (j == ncols && ncols < A->cmap->N - n) { 2385 /* a hole is outside compressed Bcols */ 2386 if (ncols == 0) { 2387 if (cstart) { 2388 offdiagIdx[r] = 0; 2389 } else offdiagIdx[r] = cend; 2390 } else { /* ncols > 0 */ 2391 offdiagIdx[r] = cmap[ncols - 1] + 1; 2392 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2393 } 2394 } 2395 } 2396 2397 for (j = 0; j < ncols; j++) { 2398 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2399 offdiagA[r] = *ba; 2400 offdiagIdx[r] = cmap[*bj]; 2401 } 2402 ba++; 2403 bj++; 2404 } 2405 } 2406 2407 PetscCall(VecGetArrayWrite(v, &a)); 2408 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2409 for (r = 0; r < m; ++r) { 2410 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2411 a[r] = diagA[r]; 2412 if (idx) idx[r] = cstart + diagIdx[r]; 2413 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2414 a[r] = diagA[r]; 2415 if (idx) { 2416 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2417 idx[r] = cstart + diagIdx[r]; 2418 } else idx[r] = offdiagIdx[r]; 2419 } 2420 } else { 2421 a[r] = offdiagA[r]; 2422 if (idx) idx[r] = offdiagIdx[r]; 2423 } 2424 } 2425 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2426 PetscCall(VecRestoreArrayWrite(v, &a)); 2427 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2428 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2429 PetscCall(VecDestroy(&diagV)); 2430 PetscCall(VecDestroy(&offdiagV)); 2431 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2432 PetscFunctionReturn(PETSC_SUCCESS); 2433 } 2434 2435 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2436 { 2437 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2438 PetscInt m = A->rmap->n, n = A->cmap->n; 2439 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2440 PetscInt *cmap = mat->garray; 2441 PetscInt *diagIdx, *offdiagIdx; 2442 Vec diagV, offdiagV; 2443 PetscScalar *a, *diagA, *offdiagA; 2444 const PetscScalar *ba, *bav; 2445 PetscInt r, j, col, ncols, *bi, *bj; 2446 Mat B = mat->B; 2447 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2448 2449 PetscFunctionBegin; 2450 /* When a process holds entire A and other processes have no entry */ 2451 if (A->cmap->N == n) { 2452 PetscCall(VecGetArrayWrite(v, &diagA)); 2453 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2454 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2455 PetscCall(VecDestroy(&diagV)); 2456 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2457 PetscFunctionReturn(PETSC_SUCCESS); 2458 } else if (n == 0) { 2459 if (m) { 2460 PetscCall(VecGetArrayWrite(v, &a)); 2461 for (r = 0; r < m; r++) { 2462 a[r] = PETSC_MIN_REAL; 2463 if (idx) idx[r] = -1; 2464 } 2465 PetscCall(VecRestoreArrayWrite(v, &a)); 2466 } 2467 PetscFunctionReturn(PETSC_SUCCESS); 2468 } 2469 2470 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2471 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2472 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2473 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2474 2475 /* Get offdiagIdx[] for implicit 0.0 */ 2476 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2477 ba = bav; 2478 bi = b->i; 2479 bj = b->j; 2480 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2481 for (r = 0; r < m; r++) { 2482 ncols = bi[r + 1] - bi[r]; 2483 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2484 offdiagA[r] = *ba; 2485 offdiagIdx[r] = cmap[0]; 2486 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2487 offdiagA[r] = 0.0; 2488 2489 /* Find first hole in the cmap */ 2490 for (j = 0; j < ncols; j++) { 2491 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2492 if (col > j && j < cstart) { 2493 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2494 break; 2495 } else if (col > j + n && j >= cstart) { 2496 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2497 break; 2498 } 2499 } 2500 if (j == ncols && ncols < A->cmap->N - n) { 2501 /* a hole is outside compressed Bcols */ 2502 if (ncols == 0) { 2503 if (cstart) { 2504 offdiagIdx[r] = 0; 2505 } else offdiagIdx[r] = cend; 2506 } else { /* ncols > 0 */ 2507 offdiagIdx[r] = cmap[ncols - 1] + 1; 2508 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2509 } 2510 } 2511 } 2512 2513 for (j = 0; j < ncols; j++) { 2514 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2515 offdiagA[r] = *ba; 2516 offdiagIdx[r] = cmap[*bj]; 2517 } 2518 ba++; 2519 bj++; 2520 } 2521 } 2522 2523 PetscCall(VecGetArrayWrite(v, &a)); 2524 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2525 for (r = 0; r < m; ++r) { 2526 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2527 a[r] = diagA[r]; 2528 if (idx) idx[r] = cstart + diagIdx[r]; 2529 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2530 a[r] = diagA[r]; 2531 if (idx) { 2532 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2533 idx[r] = cstart + diagIdx[r]; 2534 } else idx[r] = offdiagIdx[r]; 2535 } 2536 } else { 2537 a[r] = offdiagA[r]; 2538 if (idx) idx[r] = offdiagIdx[r]; 2539 } 2540 } 2541 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2542 PetscCall(VecRestoreArrayWrite(v, &a)); 2543 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2544 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2545 PetscCall(VecDestroy(&diagV)); 2546 PetscCall(VecDestroy(&offdiagV)); 2547 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2548 PetscFunctionReturn(PETSC_SUCCESS); 2549 } 2550 2551 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2552 { 2553 Mat *dummy; 2554 2555 PetscFunctionBegin; 2556 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2557 *newmat = *dummy; 2558 PetscCall(PetscFree(dummy)); 2559 PetscFunctionReturn(PETSC_SUCCESS); 2560 } 2561 2562 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2563 { 2564 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2565 2566 PetscFunctionBegin; 2567 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2568 A->factorerrortype = a->A->factorerrortype; 2569 PetscFunctionReturn(PETSC_SUCCESS); 2570 } 2571 2572 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2573 { 2574 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2575 2576 PetscFunctionBegin; 2577 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2578 PetscCall(MatSetRandom(aij->A, rctx)); 2579 if (x->assembled) { 2580 PetscCall(MatSetRandom(aij->B, rctx)); 2581 } else { 2582 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2583 } 2584 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2585 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2586 PetscFunctionReturn(PETSC_SUCCESS); 2587 } 2588 2589 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2590 { 2591 PetscFunctionBegin; 2592 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2593 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 /*@ 2598 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2599 2600 Not Collective 2601 2602 Input Parameter: 2603 . A - the matrix 2604 2605 Output Parameter: 2606 . nz - the number of nonzeros 2607 2608 Level: advanced 2609 2610 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2611 @*/ 2612 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2613 { 2614 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2615 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2616 PetscBool isaij; 2617 2618 PetscFunctionBegin; 2619 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2620 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2621 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2622 PetscFunctionReturn(PETSC_SUCCESS); 2623 } 2624 2625 /*@ 2626 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2627 2628 Collective 2629 2630 Input Parameters: 2631 + A - the matrix 2632 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2633 2634 Level: advanced 2635 2636 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2637 @*/ 2638 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2639 { 2640 PetscFunctionBegin; 2641 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2642 PetscFunctionReturn(PETSC_SUCCESS); 2643 } 2644 2645 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2646 { 2647 PetscBool sc = PETSC_FALSE, flg; 2648 2649 PetscFunctionBegin; 2650 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2651 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2652 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2653 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2654 PetscOptionsHeadEnd(); 2655 PetscFunctionReturn(PETSC_SUCCESS); 2656 } 2657 2658 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2659 { 2660 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2661 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2662 2663 PetscFunctionBegin; 2664 if (!Y->preallocated) { 2665 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2666 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2667 PetscInt nonew = aij->nonew; 2668 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2669 aij->nonew = nonew; 2670 } 2671 PetscCall(MatShift_Basic(Y, a)); 2672 PetscFunctionReturn(PETSC_SUCCESS); 2673 } 2674 2675 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2676 { 2677 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2678 2679 PetscFunctionBegin; 2680 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2681 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2682 if (d) { 2683 PetscInt rstart; 2684 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2685 *d += rstart; 2686 } 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2696 PetscFunctionReturn(PETSC_SUCCESS); 2697 } 2698 2699 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2700 { 2701 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2702 2703 PetscFunctionBegin; 2704 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2705 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2706 PetscFunctionReturn(PETSC_SUCCESS); 2707 } 2708 2709 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2710 MatGetRow_MPIAIJ, 2711 MatRestoreRow_MPIAIJ, 2712 MatMult_MPIAIJ, 2713 /* 4*/ MatMultAdd_MPIAIJ, 2714 MatMultTranspose_MPIAIJ, 2715 MatMultTransposeAdd_MPIAIJ, 2716 NULL, 2717 NULL, 2718 NULL, 2719 /*10*/ NULL, 2720 NULL, 2721 NULL, 2722 MatSOR_MPIAIJ, 2723 MatTranspose_MPIAIJ, 2724 /*15*/ MatGetInfo_MPIAIJ, 2725 MatEqual_MPIAIJ, 2726 MatGetDiagonal_MPIAIJ, 2727 MatDiagonalScale_MPIAIJ, 2728 MatNorm_MPIAIJ, 2729 /*20*/ MatAssemblyBegin_MPIAIJ, 2730 MatAssemblyEnd_MPIAIJ, 2731 MatSetOption_MPIAIJ, 2732 MatZeroEntries_MPIAIJ, 2733 /*24*/ MatZeroRows_MPIAIJ, 2734 NULL, 2735 NULL, 2736 NULL, 2737 NULL, 2738 /*29*/ MatSetUp_MPI_Hash, 2739 NULL, 2740 NULL, 2741 MatGetDiagonalBlock_MPIAIJ, 2742 NULL, 2743 /*34*/ MatDuplicate_MPIAIJ, 2744 NULL, 2745 NULL, 2746 NULL, 2747 NULL, 2748 /*39*/ MatAXPY_MPIAIJ, 2749 MatCreateSubMatrices_MPIAIJ, 2750 MatIncreaseOverlap_MPIAIJ, 2751 MatGetValues_MPIAIJ, 2752 MatCopy_MPIAIJ, 2753 /*44*/ MatGetRowMax_MPIAIJ, 2754 MatScale_MPIAIJ, 2755 MatShift_MPIAIJ, 2756 MatDiagonalSet_MPIAIJ, 2757 MatZeroRowsColumns_MPIAIJ, 2758 /*49*/ MatSetRandom_MPIAIJ, 2759 MatGetRowIJ_MPIAIJ, 2760 MatRestoreRowIJ_MPIAIJ, 2761 NULL, 2762 NULL, 2763 /*54*/ MatFDColoringCreate_MPIXAIJ, 2764 NULL, 2765 MatSetUnfactored_MPIAIJ, 2766 MatPermute_MPIAIJ, 2767 NULL, 2768 /*59*/ MatCreateSubMatrix_MPIAIJ, 2769 MatDestroy_MPIAIJ, 2770 MatView_MPIAIJ, 2771 NULL, 2772 NULL, 2773 /*64*/ NULL, 2774 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2775 NULL, 2776 NULL, 2777 NULL, 2778 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2779 MatGetRowMinAbs_MPIAIJ, 2780 NULL, 2781 NULL, 2782 NULL, 2783 NULL, 2784 /*75*/ MatFDColoringApply_AIJ, 2785 MatSetFromOptions_MPIAIJ, 2786 NULL, 2787 NULL, 2788 MatFindZeroDiagonals_MPIAIJ, 2789 /*80*/ NULL, 2790 NULL, 2791 NULL, 2792 /*83*/ MatLoad_MPIAIJ, 2793 MatIsSymmetric_MPIAIJ, 2794 NULL, 2795 NULL, 2796 NULL, 2797 NULL, 2798 /*89*/ NULL, 2799 NULL, 2800 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2801 NULL, 2802 NULL, 2803 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2804 NULL, 2805 NULL, 2806 NULL, 2807 MatBindToCPU_MPIAIJ, 2808 /*99*/ MatProductSetFromOptions_MPIAIJ, 2809 NULL, 2810 NULL, 2811 MatConjugate_MPIAIJ, 2812 NULL, 2813 /*104*/ MatSetValuesRow_MPIAIJ, 2814 MatRealPart_MPIAIJ, 2815 MatImaginaryPart_MPIAIJ, 2816 NULL, 2817 NULL, 2818 /*109*/ NULL, 2819 NULL, 2820 MatGetRowMin_MPIAIJ, 2821 NULL, 2822 MatMissingDiagonal_MPIAIJ, 2823 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2824 NULL, 2825 MatGetGhosts_MPIAIJ, 2826 NULL, 2827 NULL, 2828 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2829 NULL, 2830 NULL, 2831 NULL, 2832 MatGetMultiProcBlock_MPIAIJ, 2833 /*124*/ MatFindNonzeroRows_MPIAIJ, 2834 MatGetColumnReductions_MPIAIJ, 2835 MatInvertBlockDiagonal_MPIAIJ, 2836 MatInvertVariableBlockDiagonal_MPIAIJ, 2837 MatCreateSubMatricesMPI_MPIAIJ, 2838 /*129*/ NULL, 2839 NULL, 2840 NULL, 2841 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2842 NULL, 2843 /*134*/ NULL, 2844 NULL, 2845 NULL, 2846 NULL, 2847 NULL, 2848 /*139*/ MatSetBlockSizes_MPIAIJ, 2849 NULL, 2850 NULL, 2851 MatFDColoringSetUp_MPIXAIJ, 2852 MatFindOffBlockDiagonalEntries_MPIAIJ, 2853 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2854 /*145*/ NULL, 2855 NULL, 2856 NULL, 2857 MatCreateGraph_Simple_AIJ, 2858 NULL, 2859 /*150*/ NULL, 2860 MatEliminateZeros_MPIAIJ}; 2861 2862 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2863 { 2864 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2865 2866 PetscFunctionBegin; 2867 PetscCall(MatStoreValues(aij->A)); 2868 PetscCall(MatStoreValues(aij->B)); 2869 PetscFunctionReturn(PETSC_SUCCESS); 2870 } 2871 2872 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2873 { 2874 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2875 2876 PetscFunctionBegin; 2877 PetscCall(MatRetrieveValues(aij->A)); 2878 PetscCall(MatRetrieveValues(aij->B)); 2879 PetscFunctionReturn(PETSC_SUCCESS); 2880 } 2881 2882 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2883 { 2884 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2885 PetscMPIInt size; 2886 2887 PetscFunctionBegin; 2888 if (B->hash_active) { 2889 B->ops[0] = b->cops; 2890 B->hash_active = PETSC_FALSE; 2891 } 2892 PetscCall(PetscLayoutSetUp(B->rmap)); 2893 PetscCall(PetscLayoutSetUp(B->cmap)); 2894 2895 #if defined(PETSC_USE_CTABLE) 2896 PetscCall(PetscHMapIDestroy(&b->colmap)); 2897 #else 2898 PetscCall(PetscFree(b->colmap)); 2899 #endif 2900 PetscCall(PetscFree(b->garray)); 2901 PetscCall(VecDestroy(&b->lvec)); 2902 PetscCall(VecScatterDestroy(&b->Mvctx)); 2903 2904 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2905 PetscCall(MatDestroy(&b->B)); 2906 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2907 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2908 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2909 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2910 2911 PetscCall(MatDestroy(&b->A)); 2912 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2913 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2914 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2915 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2916 2917 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2918 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2919 B->preallocated = PETSC_TRUE; 2920 B->was_assembled = PETSC_FALSE; 2921 B->assembled = PETSC_FALSE; 2922 PetscFunctionReturn(PETSC_SUCCESS); 2923 } 2924 2925 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2926 { 2927 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2928 2929 PetscFunctionBegin; 2930 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2931 PetscCall(PetscLayoutSetUp(B->rmap)); 2932 PetscCall(PetscLayoutSetUp(B->cmap)); 2933 2934 #if defined(PETSC_USE_CTABLE) 2935 PetscCall(PetscHMapIDestroy(&b->colmap)); 2936 #else 2937 PetscCall(PetscFree(b->colmap)); 2938 #endif 2939 PetscCall(PetscFree(b->garray)); 2940 PetscCall(VecDestroy(&b->lvec)); 2941 PetscCall(VecScatterDestroy(&b->Mvctx)); 2942 2943 PetscCall(MatResetPreallocation(b->A)); 2944 PetscCall(MatResetPreallocation(b->B)); 2945 B->preallocated = PETSC_TRUE; 2946 B->was_assembled = PETSC_FALSE; 2947 B->assembled = PETSC_FALSE; 2948 PetscFunctionReturn(PETSC_SUCCESS); 2949 } 2950 2951 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2952 { 2953 Mat mat; 2954 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2955 2956 PetscFunctionBegin; 2957 *newmat = NULL; 2958 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2959 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2960 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2961 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2962 a = (Mat_MPIAIJ *)mat->data; 2963 2964 mat->factortype = matin->factortype; 2965 mat->assembled = matin->assembled; 2966 mat->insertmode = NOT_SET_VALUES; 2967 mat->preallocated = matin->preallocated; 2968 2969 a->size = oldmat->size; 2970 a->rank = oldmat->rank; 2971 a->donotstash = oldmat->donotstash; 2972 a->roworiented = oldmat->roworiented; 2973 a->rowindices = NULL; 2974 a->rowvalues = NULL; 2975 a->getrowactive = PETSC_FALSE; 2976 2977 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2978 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2979 2980 if (oldmat->colmap) { 2981 #if defined(PETSC_USE_CTABLE) 2982 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2983 #else 2984 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2985 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2986 #endif 2987 } else a->colmap = NULL; 2988 if (oldmat->garray) { 2989 PetscInt len; 2990 len = oldmat->B->cmap->n; 2991 PetscCall(PetscMalloc1(len + 1, &a->garray)); 2992 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2993 } else a->garray = NULL; 2994 2995 /* It may happen MatDuplicate is called with a non-assembled matrix 2996 In fact, MatDuplicate only requires the matrix to be preallocated 2997 This may happen inside a DMCreateMatrix_Shell */ 2998 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2999 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3000 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3001 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3002 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3003 *newmat = mat; 3004 PetscFunctionReturn(PETSC_SUCCESS); 3005 } 3006 3007 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3008 { 3009 PetscBool isbinary, ishdf5; 3010 3011 PetscFunctionBegin; 3012 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3013 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3014 /* force binary viewer to load .info file if it has not yet done so */ 3015 PetscCall(PetscViewerSetUp(viewer)); 3016 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3017 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3018 if (isbinary) { 3019 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3020 } else if (ishdf5) { 3021 #if defined(PETSC_HAVE_HDF5) 3022 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3023 #else 3024 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3025 #endif 3026 } else { 3027 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3028 } 3029 PetscFunctionReturn(PETSC_SUCCESS); 3030 } 3031 3032 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3033 { 3034 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3035 PetscInt *rowidxs, *colidxs; 3036 PetscScalar *matvals; 3037 3038 PetscFunctionBegin; 3039 PetscCall(PetscViewerSetUp(viewer)); 3040 3041 /* read in matrix header */ 3042 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3043 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3044 M = header[1]; 3045 N = header[2]; 3046 nz = header[3]; 3047 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3048 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3049 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3050 3051 /* set block sizes from the viewer's .info file */ 3052 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3053 /* set global sizes if not set already */ 3054 if (mat->rmap->N < 0) mat->rmap->N = M; 3055 if (mat->cmap->N < 0) mat->cmap->N = N; 3056 PetscCall(PetscLayoutSetUp(mat->rmap)); 3057 PetscCall(PetscLayoutSetUp(mat->cmap)); 3058 3059 /* check if the matrix sizes are correct */ 3060 PetscCall(MatGetSize(mat, &rows, &cols)); 3061 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3062 3063 /* read in row lengths and build row indices */ 3064 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3065 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3066 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3067 rowidxs[0] = 0; 3068 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3069 if (nz != PETSC_MAX_INT) { 3070 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3071 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3072 } 3073 3074 /* read in column indices and matrix values */ 3075 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3076 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3077 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3078 /* store matrix indices and values */ 3079 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3080 PetscCall(PetscFree(rowidxs)); 3081 PetscCall(PetscFree2(colidxs, matvals)); 3082 PetscFunctionReturn(PETSC_SUCCESS); 3083 } 3084 3085 /* Not scalable because of ISAllGather() unless getting all columns. */ 3086 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3087 { 3088 IS iscol_local; 3089 PetscBool isstride; 3090 PetscMPIInt lisstride = 0, gisstride; 3091 3092 PetscFunctionBegin; 3093 /* check if we are grabbing all columns*/ 3094 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3095 3096 if (isstride) { 3097 PetscInt start, len, mstart, mlen; 3098 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3099 PetscCall(ISGetLocalSize(iscol, &len)); 3100 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3101 if (mstart == start && mlen - mstart == len) lisstride = 1; 3102 } 3103 3104 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3105 if (gisstride) { 3106 PetscInt N; 3107 PetscCall(MatGetSize(mat, NULL, &N)); 3108 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3109 PetscCall(ISSetIdentity(iscol_local)); 3110 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3111 } else { 3112 PetscInt cbs; 3113 PetscCall(ISGetBlockSize(iscol, &cbs)); 3114 PetscCall(ISAllGather(iscol, &iscol_local)); 3115 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3116 } 3117 3118 *isseq = iscol_local; 3119 PetscFunctionReturn(PETSC_SUCCESS); 3120 } 3121 3122 /* 3123 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3124 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3125 3126 Input Parameters: 3127 + mat - matrix 3128 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3129 i.e., mat->rstart <= isrow[i] < mat->rend 3130 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3131 i.e., mat->cstart <= iscol[i] < mat->cend 3132 3133 Output Parameters: 3134 + isrow_d - sequential row index set for retrieving mat->A 3135 . iscol_d - sequential column index set for retrieving mat->A 3136 . iscol_o - sequential column index set for retrieving mat->B 3137 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3138 */ 3139 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3140 { 3141 Vec x, cmap; 3142 const PetscInt *is_idx; 3143 PetscScalar *xarray, *cmaparray; 3144 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3145 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3146 Mat B = a->B; 3147 Vec lvec = a->lvec, lcmap; 3148 PetscInt i, cstart, cend, Bn = B->cmap->N; 3149 MPI_Comm comm; 3150 VecScatter Mvctx = a->Mvctx; 3151 3152 PetscFunctionBegin; 3153 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3154 PetscCall(ISGetLocalSize(iscol, &ncols)); 3155 3156 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3157 PetscCall(MatCreateVecs(mat, &x, NULL)); 3158 PetscCall(VecSet(x, -1.0)); 3159 PetscCall(VecDuplicate(x, &cmap)); 3160 PetscCall(VecSet(cmap, -1.0)); 3161 3162 /* Get start indices */ 3163 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3164 isstart -= ncols; 3165 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3166 3167 PetscCall(ISGetIndices(iscol, &is_idx)); 3168 PetscCall(VecGetArray(x, &xarray)); 3169 PetscCall(VecGetArray(cmap, &cmaparray)); 3170 PetscCall(PetscMalloc1(ncols, &idx)); 3171 for (i = 0; i < ncols; i++) { 3172 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3173 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3174 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3175 } 3176 PetscCall(VecRestoreArray(x, &xarray)); 3177 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3178 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3179 3180 /* Get iscol_d */ 3181 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3182 PetscCall(ISGetBlockSize(iscol, &i)); 3183 PetscCall(ISSetBlockSize(*iscol_d, i)); 3184 3185 /* Get isrow_d */ 3186 PetscCall(ISGetLocalSize(isrow, &m)); 3187 rstart = mat->rmap->rstart; 3188 PetscCall(PetscMalloc1(m, &idx)); 3189 PetscCall(ISGetIndices(isrow, &is_idx)); 3190 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3191 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3192 3193 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3194 PetscCall(ISGetBlockSize(isrow, &i)); 3195 PetscCall(ISSetBlockSize(*isrow_d, i)); 3196 3197 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3198 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3199 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3200 3201 PetscCall(VecDuplicate(lvec, &lcmap)); 3202 3203 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3204 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3205 3206 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3207 /* off-process column indices */ 3208 count = 0; 3209 PetscCall(PetscMalloc1(Bn, &idx)); 3210 PetscCall(PetscMalloc1(Bn, &cmap1)); 3211 3212 PetscCall(VecGetArray(lvec, &xarray)); 3213 PetscCall(VecGetArray(lcmap, &cmaparray)); 3214 for (i = 0; i < Bn; i++) { 3215 if (PetscRealPart(xarray[i]) > -1.0) { 3216 idx[count] = i; /* local column index in off-diagonal part B */ 3217 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3218 count++; 3219 } 3220 } 3221 PetscCall(VecRestoreArray(lvec, &xarray)); 3222 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3223 3224 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3225 /* cannot ensure iscol_o has same blocksize as iscol! */ 3226 3227 PetscCall(PetscFree(idx)); 3228 *garray = cmap1; 3229 3230 PetscCall(VecDestroy(&x)); 3231 PetscCall(VecDestroy(&cmap)); 3232 PetscCall(VecDestroy(&lcmap)); 3233 PetscFunctionReturn(PETSC_SUCCESS); 3234 } 3235 3236 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3237 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3238 { 3239 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3240 Mat M = NULL; 3241 MPI_Comm comm; 3242 IS iscol_d, isrow_d, iscol_o; 3243 Mat Asub = NULL, Bsub = NULL; 3244 PetscInt n; 3245 3246 PetscFunctionBegin; 3247 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3248 3249 if (call == MAT_REUSE_MATRIX) { 3250 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3251 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3252 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3253 3254 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3255 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3256 3257 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3258 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3259 3260 /* Update diagonal and off-diagonal portions of submat */ 3261 asub = (Mat_MPIAIJ *)(*submat)->data; 3262 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3263 PetscCall(ISGetLocalSize(iscol_o, &n)); 3264 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3265 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3266 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3267 3268 } else { /* call == MAT_INITIAL_MATRIX) */ 3269 const PetscInt *garray; 3270 PetscInt BsubN; 3271 3272 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3273 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3274 3275 /* Create local submatrices Asub and Bsub */ 3276 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3277 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3278 3279 /* Create submatrix M */ 3280 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3281 3282 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3283 asub = (Mat_MPIAIJ *)M->data; 3284 3285 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3286 n = asub->B->cmap->N; 3287 if (BsubN > n) { 3288 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3289 const PetscInt *idx; 3290 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3291 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3292 3293 PetscCall(PetscMalloc1(n, &idx_new)); 3294 j = 0; 3295 PetscCall(ISGetIndices(iscol_o, &idx)); 3296 for (i = 0; i < n; i++) { 3297 if (j >= BsubN) break; 3298 while (subgarray[i] > garray[j]) j++; 3299 3300 if (subgarray[i] == garray[j]) { 3301 idx_new[i] = idx[j++]; 3302 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3303 } 3304 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3305 3306 PetscCall(ISDestroy(&iscol_o)); 3307 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3308 3309 } else if (BsubN < n) { 3310 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3311 } 3312 3313 PetscCall(PetscFree(garray)); 3314 *submat = M; 3315 3316 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3317 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3318 PetscCall(ISDestroy(&isrow_d)); 3319 3320 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3321 PetscCall(ISDestroy(&iscol_d)); 3322 3323 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3324 PetscCall(ISDestroy(&iscol_o)); 3325 } 3326 PetscFunctionReturn(PETSC_SUCCESS); 3327 } 3328 3329 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3330 { 3331 IS iscol_local = NULL, isrow_d; 3332 PetscInt csize; 3333 PetscInt n, i, j, start, end; 3334 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3335 MPI_Comm comm; 3336 3337 PetscFunctionBegin; 3338 /* If isrow has same processor distribution as mat, 3339 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3340 if (call == MAT_REUSE_MATRIX) { 3341 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3342 if (isrow_d) { 3343 sameRowDist = PETSC_TRUE; 3344 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3345 } else { 3346 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3347 if (iscol_local) { 3348 sameRowDist = PETSC_TRUE; 3349 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3350 } 3351 } 3352 } else { 3353 /* Check if isrow has same processor distribution as mat */ 3354 sameDist[0] = PETSC_FALSE; 3355 PetscCall(ISGetLocalSize(isrow, &n)); 3356 if (!n) { 3357 sameDist[0] = PETSC_TRUE; 3358 } else { 3359 PetscCall(ISGetMinMax(isrow, &i, &j)); 3360 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3361 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3362 } 3363 3364 /* Check if iscol has same processor distribution as mat */ 3365 sameDist[1] = PETSC_FALSE; 3366 PetscCall(ISGetLocalSize(iscol, &n)); 3367 if (!n) { 3368 sameDist[1] = PETSC_TRUE; 3369 } else { 3370 PetscCall(ISGetMinMax(iscol, &i, &j)); 3371 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3372 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3373 } 3374 3375 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3376 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3377 sameRowDist = tsameDist[0]; 3378 } 3379 3380 if (sameRowDist) { 3381 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3382 /* isrow and iscol have same processor distribution as mat */ 3383 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3384 PetscFunctionReturn(PETSC_SUCCESS); 3385 } else { /* sameRowDist */ 3386 /* isrow has same processor distribution as mat */ 3387 if (call == MAT_INITIAL_MATRIX) { 3388 PetscBool sorted; 3389 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3390 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3391 PetscCall(ISGetSize(iscol, &i)); 3392 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3393 3394 PetscCall(ISSorted(iscol_local, &sorted)); 3395 if (sorted) { 3396 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3397 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3398 PetscFunctionReturn(PETSC_SUCCESS); 3399 } 3400 } else { /* call == MAT_REUSE_MATRIX */ 3401 IS iscol_sub; 3402 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3403 if (iscol_sub) { 3404 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3405 PetscFunctionReturn(PETSC_SUCCESS); 3406 } 3407 } 3408 } 3409 } 3410 3411 /* General case: iscol -> iscol_local which has global size of iscol */ 3412 if (call == MAT_REUSE_MATRIX) { 3413 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3414 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3415 } else { 3416 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3417 } 3418 3419 PetscCall(ISGetLocalSize(iscol, &csize)); 3420 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3421 3422 if (call == MAT_INITIAL_MATRIX) { 3423 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3424 PetscCall(ISDestroy(&iscol_local)); 3425 } 3426 PetscFunctionReturn(PETSC_SUCCESS); 3427 } 3428 3429 /*@C 3430 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3431 and "off-diagonal" part of the matrix in CSR format. 3432 3433 Collective 3434 3435 Input Parameters: 3436 + comm - MPI communicator 3437 . A - "diagonal" portion of matrix 3438 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3439 - garray - global index of `B` columns 3440 3441 Output Parameter: 3442 . mat - the matrix, with input `A` as its local diagonal matrix 3443 3444 Level: advanced 3445 3446 Notes: 3447 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3448 3449 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3450 3451 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3452 @*/ 3453 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3454 { 3455 Mat_MPIAIJ *maij; 3456 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3457 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3458 const PetscScalar *oa; 3459 Mat Bnew; 3460 PetscInt m, n, N; 3461 MatType mpi_mat_type; 3462 3463 PetscFunctionBegin; 3464 PetscCall(MatCreate(comm, mat)); 3465 PetscCall(MatGetSize(A, &m, &n)); 3466 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3467 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3468 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3469 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3470 3471 /* Get global columns of mat */ 3472 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3473 3474 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3475 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3476 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3477 PetscCall(MatSetType(*mat, mpi_mat_type)); 3478 3479 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3480 maij = (Mat_MPIAIJ *)(*mat)->data; 3481 3482 (*mat)->preallocated = PETSC_TRUE; 3483 3484 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3485 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3486 3487 /* Set A as diagonal portion of *mat */ 3488 maij->A = A; 3489 3490 nz = oi[m]; 3491 for (i = 0; i < nz; i++) { 3492 col = oj[i]; 3493 oj[i] = garray[col]; 3494 } 3495 3496 /* Set Bnew as off-diagonal portion of *mat */ 3497 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3498 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3499 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3500 bnew = (Mat_SeqAIJ *)Bnew->data; 3501 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3502 maij->B = Bnew; 3503 3504 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3505 3506 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3507 b->free_a = PETSC_FALSE; 3508 b->free_ij = PETSC_FALSE; 3509 PetscCall(MatDestroy(&B)); 3510 3511 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3512 bnew->free_a = PETSC_TRUE; 3513 bnew->free_ij = PETSC_TRUE; 3514 3515 /* condense columns of maij->B */ 3516 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3517 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3518 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3519 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3520 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3521 PetscFunctionReturn(PETSC_SUCCESS); 3522 } 3523 3524 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3525 3526 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3527 { 3528 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3529 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3530 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3531 Mat M, Msub, B = a->B; 3532 MatScalar *aa; 3533 Mat_SeqAIJ *aij; 3534 PetscInt *garray = a->garray, *colsub, Ncols; 3535 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3536 IS iscol_sub, iscmap; 3537 const PetscInt *is_idx, *cmap; 3538 PetscBool allcolumns = PETSC_FALSE; 3539 MPI_Comm comm; 3540 3541 PetscFunctionBegin; 3542 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3543 if (call == MAT_REUSE_MATRIX) { 3544 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3545 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3546 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3547 3548 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3549 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3550 3551 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3552 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3553 3554 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3555 3556 } else { /* call == MAT_INITIAL_MATRIX) */ 3557 PetscBool flg; 3558 3559 PetscCall(ISGetLocalSize(iscol, &n)); 3560 PetscCall(ISGetSize(iscol, &Ncols)); 3561 3562 /* (1) iscol -> nonscalable iscol_local */ 3563 /* Check for special case: each processor gets entire matrix columns */ 3564 PetscCall(ISIdentity(iscol_local, &flg)); 3565 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3566 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3567 if (allcolumns) { 3568 iscol_sub = iscol_local; 3569 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3570 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3571 3572 } else { 3573 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3574 PetscInt *idx, *cmap1, k; 3575 PetscCall(PetscMalloc1(Ncols, &idx)); 3576 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3577 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3578 count = 0; 3579 k = 0; 3580 for (i = 0; i < Ncols; i++) { 3581 j = is_idx[i]; 3582 if (j >= cstart && j < cend) { 3583 /* diagonal part of mat */ 3584 idx[count] = j; 3585 cmap1[count++] = i; /* column index in submat */ 3586 } else if (Bn) { 3587 /* off-diagonal part of mat */ 3588 if (j == garray[k]) { 3589 idx[count] = j; 3590 cmap1[count++] = i; /* column index in submat */ 3591 } else if (j > garray[k]) { 3592 while (j > garray[k] && k < Bn - 1) k++; 3593 if (j == garray[k]) { 3594 idx[count] = j; 3595 cmap1[count++] = i; /* column index in submat */ 3596 } 3597 } 3598 } 3599 } 3600 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3601 3602 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3603 PetscCall(ISGetBlockSize(iscol, &cbs)); 3604 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3605 3606 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3607 } 3608 3609 /* (3) Create sequential Msub */ 3610 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3611 } 3612 3613 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3614 aij = (Mat_SeqAIJ *)(Msub)->data; 3615 ii = aij->i; 3616 PetscCall(ISGetIndices(iscmap, &cmap)); 3617 3618 /* 3619 m - number of local rows 3620 Ncols - number of columns (same on all processors) 3621 rstart - first row in new global matrix generated 3622 */ 3623 PetscCall(MatGetSize(Msub, &m, NULL)); 3624 3625 if (call == MAT_INITIAL_MATRIX) { 3626 /* (4) Create parallel newmat */ 3627 PetscMPIInt rank, size; 3628 PetscInt csize; 3629 3630 PetscCallMPI(MPI_Comm_size(comm, &size)); 3631 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3632 3633 /* 3634 Determine the number of non-zeros in the diagonal and off-diagonal 3635 portions of the matrix in order to do correct preallocation 3636 */ 3637 3638 /* first get start and end of "diagonal" columns */ 3639 PetscCall(ISGetLocalSize(iscol, &csize)); 3640 if (csize == PETSC_DECIDE) { 3641 PetscCall(ISGetSize(isrow, &mglobal)); 3642 if (mglobal == Ncols) { /* square matrix */ 3643 nlocal = m; 3644 } else { 3645 nlocal = Ncols / size + ((Ncols % size) > rank); 3646 } 3647 } else { 3648 nlocal = csize; 3649 } 3650 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3651 rstart = rend - nlocal; 3652 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3653 3654 /* next, compute all the lengths */ 3655 jj = aij->j; 3656 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3657 olens = dlens + m; 3658 for (i = 0; i < m; i++) { 3659 jend = ii[i + 1] - ii[i]; 3660 olen = 0; 3661 dlen = 0; 3662 for (j = 0; j < jend; j++) { 3663 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3664 else dlen++; 3665 jj++; 3666 } 3667 olens[i] = olen; 3668 dlens[i] = dlen; 3669 } 3670 3671 PetscCall(ISGetBlockSize(isrow, &bs)); 3672 PetscCall(ISGetBlockSize(iscol, &cbs)); 3673 3674 PetscCall(MatCreate(comm, &M)); 3675 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3676 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3677 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3678 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3679 PetscCall(PetscFree(dlens)); 3680 3681 } else { /* call == MAT_REUSE_MATRIX */ 3682 M = *newmat; 3683 PetscCall(MatGetLocalSize(M, &i, NULL)); 3684 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3685 PetscCall(MatZeroEntries(M)); 3686 /* 3687 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3688 rather than the slower MatSetValues(). 3689 */ 3690 M->was_assembled = PETSC_TRUE; 3691 M->assembled = PETSC_FALSE; 3692 } 3693 3694 /* (5) Set values of Msub to *newmat */ 3695 PetscCall(PetscMalloc1(count, &colsub)); 3696 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3697 3698 jj = aij->j; 3699 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3700 for (i = 0; i < m; i++) { 3701 row = rstart + i; 3702 nz = ii[i + 1] - ii[i]; 3703 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3704 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3705 jj += nz; 3706 aa += nz; 3707 } 3708 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3709 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3710 3711 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3712 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3713 3714 PetscCall(PetscFree(colsub)); 3715 3716 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3717 if (call == MAT_INITIAL_MATRIX) { 3718 *newmat = M; 3719 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3720 PetscCall(MatDestroy(&Msub)); 3721 3722 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3723 PetscCall(ISDestroy(&iscol_sub)); 3724 3725 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3726 PetscCall(ISDestroy(&iscmap)); 3727 3728 if (iscol_local) { 3729 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3730 PetscCall(ISDestroy(&iscol_local)); 3731 } 3732 } 3733 PetscFunctionReturn(PETSC_SUCCESS); 3734 } 3735 3736 /* 3737 Not great since it makes two copies of the submatrix, first an SeqAIJ 3738 in local and then by concatenating the local matrices the end result. 3739 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3740 3741 This requires a sequential iscol with all indices. 3742 */ 3743 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3744 { 3745 PetscMPIInt rank, size; 3746 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3747 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3748 Mat M, Mreuse; 3749 MatScalar *aa, *vwork; 3750 MPI_Comm comm; 3751 Mat_SeqAIJ *aij; 3752 PetscBool colflag, allcolumns = PETSC_FALSE; 3753 3754 PetscFunctionBegin; 3755 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3756 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3757 PetscCallMPI(MPI_Comm_size(comm, &size)); 3758 3759 /* Check for special case: each processor gets entire matrix columns */ 3760 PetscCall(ISIdentity(iscol, &colflag)); 3761 PetscCall(ISGetLocalSize(iscol, &n)); 3762 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3763 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3764 3765 if (call == MAT_REUSE_MATRIX) { 3766 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3767 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3768 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3769 } else { 3770 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3771 } 3772 3773 /* 3774 m - number of local rows 3775 n - number of columns (same on all processors) 3776 rstart - first row in new global matrix generated 3777 */ 3778 PetscCall(MatGetSize(Mreuse, &m, &n)); 3779 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3780 if (call == MAT_INITIAL_MATRIX) { 3781 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3782 ii = aij->i; 3783 jj = aij->j; 3784 3785 /* 3786 Determine the number of non-zeros in the diagonal and off-diagonal 3787 portions of the matrix in order to do correct preallocation 3788 */ 3789 3790 /* first get start and end of "diagonal" columns */ 3791 if (csize == PETSC_DECIDE) { 3792 PetscCall(ISGetSize(isrow, &mglobal)); 3793 if (mglobal == n) { /* square matrix */ 3794 nlocal = m; 3795 } else { 3796 nlocal = n / size + ((n % size) > rank); 3797 } 3798 } else { 3799 nlocal = csize; 3800 } 3801 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3802 rstart = rend - nlocal; 3803 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3804 3805 /* next, compute all the lengths */ 3806 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3807 olens = dlens + m; 3808 for (i = 0; i < m; i++) { 3809 jend = ii[i + 1] - ii[i]; 3810 olen = 0; 3811 dlen = 0; 3812 for (j = 0; j < jend; j++) { 3813 if (*jj < rstart || *jj >= rend) olen++; 3814 else dlen++; 3815 jj++; 3816 } 3817 olens[i] = olen; 3818 dlens[i] = dlen; 3819 } 3820 PetscCall(MatCreate(comm, &M)); 3821 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3822 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3823 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3824 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3825 PetscCall(PetscFree(dlens)); 3826 } else { 3827 PetscInt ml, nl; 3828 3829 M = *newmat; 3830 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3831 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3832 PetscCall(MatZeroEntries(M)); 3833 /* 3834 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3835 rather than the slower MatSetValues(). 3836 */ 3837 M->was_assembled = PETSC_TRUE; 3838 M->assembled = PETSC_FALSE; 3839 } 3840 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3841 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3842 ii = aij->i; 3843 jj = aij->j; 3844 3845 /* trigger copy to CPU if needed */ 3846 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3847 for (i = 0; i < m; i++) { 3848 row = rstart + i; 3849 nz = ii[i + 1] - ii[i]; 3850 cwork = jj; 3851 jj += nz; 3852 vwork = aa; 3853 aa += nz; 3854 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3855 } 3856 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3857 3858 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3859 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3860 *newmat = M; 3861 3862 /* save submatrix used in processor for next request */ 3863 if (call == MAT_INITIAL_MATRIX) { 3864 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3865 PetscCall(MatDestroy(&Mreuse)); 3866 } 3867 PetscFunctionReturn(PETSC_SUCCESS); 3868 } 3869 3870 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3871 { 3872 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3873 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3874 const PetscInt *JJ; 3875 PetscBool nooffprocentries; 3876 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3877 3878 PetscFunctionBegin; 3879 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3880 3881 PetscCall(PetscLayoutSetUp(B->rmap)); 3882 PetscCall(PetscLayoutSetUp(B->cmap)); 3883 m = B->rmap->n; 3884 cstart = B->cmap->rstart; 3885 cend = B->cmap->rend; 3886 rstart = B->rmap->rstart; 3887 3888 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3889 3890 if (PetscDefined(USE_DEBUG)) { 3891 for (i = 0; i < m; i++) { 3892 nnz = Ii[i + 1] - Ii[i]; 3893 JJ = J ? J + Ii[i] : NULL; 3894 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3895 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3896 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3897 } 3898 } 3899 3900 for (i = 0; i < m; i++) { 3901 nnz = Ii[i + 1] - Ii[i]; 3902 JJ = J ? J + Ii[i] : NULL; 3903 nnz_max = PetscMax(nnz_max, nnz); 3904 d = 0; 3905 for (j = 0; j < nnz; j++) { 3906 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3907 } 3908 d_nnz[i] = d; 3909 o_nnz[i] = nnz - d; 3910 } 3911 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3912 PetscCall(PetscFree2(d_nnz, o_nnz)); 3913 3914 for (i = 0; i < m; i++) { 3915 ii = i + rstart; 3916 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J ? J + Ii[i] : NULL, v ? v + Ii[i] : NULL, INSERT_VALUES)); 3917 } 3918 nooffprocentries = B->nooffprocentries; 3919 B->nooffprocentries = PETSC_TRUE; 3920 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3921 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3922 B->nooffprocentries = nooffprocentries; 3923 3924 /* count number of entries below block diagonal */ 3925 PetscCall(PetscFree(Aij->ld)); 3926 PetscCall(PetscCalloc1(m, &ld)); 3927 Aij->ld = ld; 3928 for (i = 0; i < m; i++) { 3929 nnz = Ii[i + 1] - Ii[i]; 3930 j = 0; 3931 while (j < nnz && J[j] < cstart) j++; 3932 ld[i] = j; 3933 if (J) J += nnz; 3934 } 3935 3936 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3937 PetscFunctionReturn(PETSC_SUCCESS); 3938 } 3939 3940 /*@ 3941 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3942 (the default parallel PETSc format). 3943 3944 Collective 3945 3946 Input Parameters: 3947 + B - the matrix 3948 . i - the indices into j for the start of each local row (starts with zero) 3949 . j - the column indices for each local row (starts with zero) 3950 - v - optional values in the matrix 3951 3952 Level: developer 3953 3954 Notes: 3955 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3956 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3957 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3958 3959 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3960 3961 The format which is used for the sparse matrix input, is equivalent to a 3962 row-major ordering.. i.e for the following matrix, the input data expected is 3963 as shown 3964 3965 .vb 3966 1 0 0 3967 2 0 3 P0 3968 ------- 3969 4 5 6 P1 3970 3971 Process0 [P0] rows_owned=[0,1] 3972 i = {0,1,3} [size = nrow+1 = 2+1] 3973 j = {0,0,2} [size = 3] 3974 v = {1,2,3} [size = 3] 3975 3976 Process1 [P1] rows_owned=[2] 3977 i = {0,3} [size = nrow+1 = 1+1] 3978 j = {0,1,2} [size = 3] 3979 v = {4,5,6} [size = 3] 3980 .ve 3981 3982 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 3983 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3984 @*/ 3985 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3986 { 3987 PetscFunctionBegin; 3988 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3989 PetscFunctionReturn(PETSC_SUCCESS); 3990 } 3991 3992 /*@C 3993 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3994 (the default parallel PETSc format). For good matrix assembly performance 3995 the user should preallocate the matrix storage by setting the parameters 3996 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 3997 3998 Collective 3999 4000 Input Parameters: 4001 + B - the matrix 4002 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4003 (same value is used for all local rows) 4004 . d_nnz - array containing the number of nonzeros in the various rows of the 4005 DIAGONAL portion of the local submatrix (possibly different for each row) 4006 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4007 The size of this array is equal to the number of local rows, i.e 'm'. 4008 For matrices that will be factored, you must leave room for (and set) 4009 the diagonal entry even if it is zero. 4010 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4011 submatrix (same value is used for all local rows). 4012 - o_nnz - array containing the number of nonzeros in the various rows of the 4013 OFF-DIAGONAL portion of the local submatrix (possibly different for 4014 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4015 structure. The size of this array is equal to the number 4016 of local rows, i.e 'm'. 4017 4018 Example Usage: 4019 Consider the following 8x8 matrix with 34 non-zero values, that is 4020 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4021 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4022 as follows 4023 4024 .vb 4025 1 2 0 | 0 3 0 | 0 4 4026 Proc0 0 5 6 | 7 0 0 | 8 0 4027 9 0 10 | 11 0 0 | 12 0 4028 ------------------------------------- 4029 13 0 14 | 15 16 17 | 0 0 4030 Proc1 0 18 0 | 19 20 21 | 0 0 4031 0 0 0 | 22 23 0 | 24 0 4032 ------------------------------------- 4033 Proc2 25 26 27 | 0 0 28 | 29 0 4034 30 0 0 | 31 32 33 | 0 34 4035 .ve 4036 4037 This can be represented as a collection of submatrices as 4038 .vb 4039 A B C 4040 D E F 4041 G H I 4042 .ve 4043 4044 Where the submatrices A,B,C are owned by proc0, D,E,F are 4045 owned by proc1, G,H,I are owned by proc2. 4046 4047 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4048 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4049 The 'M','N' parameters are 8,8, and have the same values on all procs. 4050 4051 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4052 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4053 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4054 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4055 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4056 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4057 4058 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4059 allocated for every row of the local diagonal submatrix, and `o_nz` 4060 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4061 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4062 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4063 In this case, the values of `d_nz`, `o_nz` are 4064 .vb 4065 proc0 dnz = 2, o_nz = 2 4066 proc1 dnz = 3, o_nz = 2 4067 proc2 dnz = 1, o_nz = 4 4068 .ve 4069 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4070 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4071 for proc3. i.e we are using 12+15+10=37 storage locations to store 4072 34 values. 4073 4074 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4075 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4076 In the above case the values for `d_nnz`, `o_nnz` are 4077 .vb 4078 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4079 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4080 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4081 .ve 4082 Here the space allocated is sum of all the above values i.e 34, and 4083 hence pre-allocation is perfect. 4084 4085 Level: intermediate 4086 4087 Notes: 4088 If the *_nnz parameter is given then the *_nz parameter is ignored 4089 4090 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4091 storage. The stored row and column indices begin with zero. 4092 See [Sparse Matrices](sec_matsparse) for details. 4093 4094 The parallel matrix is partitioned such that the first m0 rows belong to 4095 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4096 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4097 4098 The DIAGONAL portion of the local submatrix of a processor can be defined 4099 as the submatrix which is obtained by extraction the part corresponding to 4100 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4101 first row that belongs to the processor, r2 is the last row belonging to 4102 the this processor, and c1-c2 is range of indices of the local part of a 4103 vector suitable for applying the matrix to. This is an mxn matrix. In the 4104 common case of a square matrix, the row and column ranges are the same and 4105 the DIAGONAL part is also square. The remaining portion of the local 4106 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4107 4108 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4109 4110 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4111 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4112 You can also run with the option `-info` and look for messages with the string 4113 malloc in them to see if additional memory allocation was needed. 4114 4115 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4116 `MatGetInfo()`, `PetscSplitOwnership()` 4117 @*/ 4118 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4119 { 4120 PetscFunctionBegin; 4121 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4122 PetscValidType(B, 1); 4123 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4124 PetscFunctionReturn(PETSC_SUCCESS); 4125 } 4126 4127 /*@ 4128 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4129 CSR format for the local rows. 4130 4131 Collective 4132 4133 Input Parameters: 4134 + comm - MPI communicator 4135 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4136 . n - This value should be the same as the local size used in creating the 4137 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4138 calculated if N is given) For square matrices n is almost always m. 4139 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4140 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4141 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4142 . j - column indices 4143 - a - optional matrix values 4144 4145 Output Parameter: 4146 . mat - the matrix 4147 4148 Level: intermediate 4149 4150 Notes: 4151 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4152 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4153 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4154 4155 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4156 4157 The format which is used for the sparse matrix input, is equivalent to a 4158 row-major ordering.. i.e for the following matrix, the input data expected is 4159 as shown 4160 4161 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4162 .vb 4163 1 0 0 4164 2 0 3 P0 4165 ------- 4166 4 5 6 P1 4167 4168 Process0 [P0] rows_owned=[0,1] 4169 i = {0,1,3} [size = nrow+1 = 2+1] 4170 j = {0,0,2} [size = 3] 4171 v = {1,2,3} [size = 3] 4172 4173 Process1 [P1] rows_owned=[2] 4174 i = {0,3} [size = nrow+1 = 1+1] 4175 j = {0,1,2} [size = 3] 4176 v = {4,5,6} [size = 3] 4177 .ve 4178 4179 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4180 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4181 @*/ 4182 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4183 { 4184 PetscFunctionBegin; 4185 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4186 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4187 PetscCall(MatCreate(comm, mat)); 4188 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4189 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4190 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4191 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4192 PetscFunctionReturn(PETSC_SUCCESS); 4193 } 4194 4195 /*@ 4196 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4197 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4198 from `MatCreateMPIAIJWithArrays()` 4199 4200 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4201 4202 Collective 4203 4204 Input Parameters: 4205 + mat - the matrix 4206 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4207 . n - This value should be the same as the local size used in creating the 4208 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4209 calculated if N is given) For square matrices n is almost always m. 4210 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4211 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4212 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4213 . J - column indices 4214 - v - matrix values 4215 4216 Level: deprecated 4217 4218 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4219 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()` 4220 @*/ 4221 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4222 { 4223 PetscInt nnz, i; 4224 PetscBool nooffprocentries; 4225 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4226 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4227 PetscScalar *ad, *ao; 4228 PetscInt ldi, Iii, md; 4229 const PetscInt *Adi = Ad->i; 4230 PetscInt *ld = Aij->ld; 4231 4232 PetscFunctionBegin; 4233 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4234 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4235 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4236 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4237 4238 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4239 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4240 4241 for (i = 0; i < m; i++) { 4242 nnz = Ii[i + 1] - Ii[i]; 4243 Iii = Ii[i]; 4244 ldi = ld[i]; 4245 md = Adi[i + 1] - Adi[i]; 4246 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4247 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4248 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4249 ad += md; 4250 ao += nnz - md; 4251 } 4252 nooffprocentries = mat->nooffprocentries; 4253 mat->nooffprocentries = PETSC_TRUE; 4254 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4255 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4256 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4257 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4258 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4259 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4260 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4261 mat->nooffprocentries = nooffprocentries; 4262 PetscFunctionReturn(PETSC_SUCCESS); 4263 } 4264 4265 /*@ 4266 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4267 4268 Collective 4269 4270 Input Parameters: 4271 + mat - the matrix 4272 - v - matrix values, stored by row 4273 4274 Level: intermediate 4275 4276 Note: 4277 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4278 4279 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4280 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4281 @*/ 4282 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4283 { 4284 PetscInt nnz, i, m; 4285 PetscBool nooffprocentries; 4286 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4287 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4288 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4289 PetscScalar *ad, *ao; 4290 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4291 PetscInt ldi, Iii, md; 4292 PetscInt *ld = Aij->ld; 4293 4294 PetscFunctionBegin; 4295 m = mat->rmap->n; 4296 4297 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4298 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4299 Iii = 0; 4300 for (i = 0; i < m; i++) { 4301 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4302 ldi = ld[i]; 4303 md = Adi[i + 1] - Adi[i]; 4304 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4305 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4306 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4307 ad += md; 4308 ao += nnz - md; 4309 Iii += nnz; 4310 } 4311 nooffprocentries = mat->nooffprocentries; 4312 mat->nooffprocentries = PETSC_TRUE; 4313 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4314 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4315 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4316 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4317 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4318 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4319 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4320 mat->nooffprocentries = nooffprocentries; 4321 PetscFunctionReturn(PETSC_SUCCESS); 4322 } 4323 4324 /*@C 4325 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4326 (the default parallel PETSc format). For good matrix assembly performance 4327 the user should preallocate the matrix storage by setting the parameters 4328 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4329 4330 Collective 4331 4332 Input Parameters: 4333 + comm - MPI communicator 4334 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4335 This value should be the same as the local size used in creating the 4336 y vector for the matrix-vector product y = Ax. 4337 . n - This value should be the same as the local size used in creating the 4338 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4339 calculated if N is given) For square matrices n is almost always m. 4340 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4341 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4342 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4343 (same value is used for all local rows) 4344 . d_nnz - array containing the number of nonzeros in the various rows of the 4345 DIAGONAL portion of the local submatrix (possibly different for each row) 4346 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4347 The size of this array is equal to the number of local rows, i.e 'm'. 4348 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4349 submatrix (same value is used for all local rows). 4350 - o_nnz - array containing the number of nonzeros in the various rows of the 4351 OFF-DIAGONAL portion of the local submatrix (possibly different for 4352 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4353 structure. The size of this array is equal to the number 4354 of local rows, i.e 'm'. 4355 4356 Output Parameter: 4357 . A - the matrix 4358 4359 Options Database Keys: 4360 + -mat_no_inode - Do not use inodes 4361 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4362 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4363 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4364 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4365 4366 Level: intermediate 4367 4368 Notes: 4369 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4370 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4371 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4372 4373 If the *_nnz parameter is given then the *_nz parameter is ignored 4374 4375 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4376 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4377 storage requirements for this matrix. 4378 4379 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4380 processor than it must be used on all processors that share the object for 4381 that argument. 4382 4383 The user MUST specify either the local or global matrix dimensions 4384 (possibly both). 4385 4386 The parallel matrix is partitioned across processors such that the 4387 first m0 rows belong to process 0, the next m1 rows belong to 4388 process 1, the next m2 rows belong to process 2 etc.. where 4389 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4390 values corresponding to [m x N] submatrix. 4391 4392 The columns are logically partitioned with the n0 columns belonging 4393 to 0th partition, the next n1 columns belonging to the next 4394 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4395 4396 The DIAGONAL portion of the local submatrix on any given processor 4397 is the submatrix corresponding to the rows and columns m,n 4398 corresponding to the given processor. i.e diagonal matrix on 4399 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4400 etc. The remaining portion of the local submatrix [m x (N-n)] 4401 constitute the OFF-DIAGONAL portion. The example below better 4402 illustrates this concept. 4403 4404 For a square global matrix we define each processor's diagonal portion 4405 to be its local rows and the corresponding columns (a square submatrix); 4406 each processor's off-diagonal portion encompasses the remainder of the 4407 local matrix (a rectangular submatrix). 4408 4409 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4410 4411 When calling this routine with a single process communicator, a matrix of 4412 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4413 type of communicator, use the construction mechanism 4414 .vb 4415 MatCreate(..., &A); 4416 MatSetType(A, MATMPIAIJ); 4417 MatSetSizes(A, m, n, M, N); 4418 MatMPIAIJSetPreallocation(A, ...); 4419 .ve 4420 4421 By default, this format uses inodes (identical nodes) when possible. 4422 We search for consecutive rows with the same nonzero structure, thereby 4423 reusing matrix information to achieve increased efficiency. 4424 4425 Example Usage: 4426 Consider the following 8x8 matrix with 34 non-zero values, that is 4427 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4428 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4429 as follows 4430 4431 .vb 4432 1 2 0 | 0 3 0 | 0 4 4433 Proc0 0 5 6 | 7 0 0 | 8 0 4434 9 0 10 | 11 0 0 | 12 0 4435 ------------------------------------- 4436 13 0 14 | 15 16 17 | 0 0 4437 Proc1 0 18 0 | 19 20 21 | 0 0 4438 0 0 0 | 22 23 0 | 24 0 4439 ------------------------------------- 4440 Proc2 25 26 27 | 0 0 28 | 29 0 4441 30 0 0 | 31 32 33 | 0 34 4442 .ve 4443 4444 This can be represented as a collection of submatrices as 4445 4446 .vb 4447 A B C 4448 D E F 4449 G H I 4450 .ve 4451 4452 Where the submatrices A,B,C are owned by proc0, D,E,F are 4453 owned by proc1, G,H,I are owned by proc2. 4454 4455 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4456 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4457 The 'M','N' parameters are 8,8, and have the same values on all procs. 4458 4459 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4460 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4461 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4462 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4463 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4464 matrix, ans [DF] as another SeqAIJ matrix. 4465 4466 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4467 allocated for every row of the local diagonal submatrix, and `o_nz` 4468 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4469 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4470 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4471 In this case, the values of `d_nz`,`o_nz` are 4472 .vb 4473 proc0 dnz = 2, o_nz = 2 4474 proc1 dnz = 3, o_nz = 2 4475 proc2 dnz = 1, o_nz = 4 4476 .ve 4477 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4478 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4479 for proc3. i.e we are using 12+15+10=37 storage locations to store 4480 34 values. 4481 4482 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4483 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4484 In the above case the values for d_nnz,o_nnz are 4485 .vb 4486 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4487 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4488 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4489 .ve 4490 Here the space allocated is sum of all the above values i.e 34, and 4491 hence pre-allocation is perfect. 4492 4493 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4494 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4495 @*/ 4496 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4497 { 4498 PetscMPIInt size; 4499 4500 PetscFunctionBegin; 4501 PetscCall(MatCreate(comm, A)); 4502 PetscCall(MatSetSizes(*A, m, n, M, N)); 4503 PetscCallMPI(MPI_Comm_size(comm, &size)); 4504 if (size > 1) { 4505 PetscCall(MatSetType(*A, MATMPIAIJ)); 4506 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4507 } else { 4508 PetscCall(MatSetType(*A, MATSEQAIJ)); 4509 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4510 } 4511 PetscFunctionReturn(PETSC_SUCCESS); 4512 } 4513 4514 /*MC 4515 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4516 4517 Synopsis: 4518 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4519 4520 Not Collective 4521 4522 Input Parameter: 4523 . A - the `MATMPIAIJ` matrix 4524 4525 Output Parameters: 4526 + Ad - the diagonal portion of the matrix 4527 . Ao - the off-diagonal portion of the matrix 4528 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4529 - ierr - error code 4530 4531 Level: advanced 4532 4533 Note: 4534 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4535 4536 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4537 M*/ 4538 4539 /*MC 4540 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4541 4542 Synopsis: 4543 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4544 4545 Not Collective 4546 4547 Input Parameters: 4548 + A - the `MATMPIAIJ` matrix 4549 . Ad - the diagonal portion of the matrix 4550 . Ao - the off-diagonal portion of the matrix 4551 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4552 - ierr - error code 4553 4554 Level: advanced 4555 4556 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4557 M*/ 4558 4559 /*@C 4560 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4561 4562 Not Collective 4563 4564 Input Parameter: 4565 . A - The `MATMPIAIJ` matrix 4566 4567 Output Parameters: 4568 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4569 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4570 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4571 4572 Level: intermediate 4573 4574 Note: 4575 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4576 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4577 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4578 local column numbers to global column numbers in the original matrix. 4579 4580 Fortran Notes: 4581 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4582 4583 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4584 @*/ 4585 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4586 { 4587 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4588 PetscBool flg; 4589 4590 PetscFunctionBegin; 4591 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4592 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4593 if (Ad) *Ad = a->A; 4594 if (Ao) *Ao = a->B; 4595 if (colmap) *colmap = a->garray; 4596 PetscFunctionReturn(PETSC_SUCCESS); 4597 } 4598 4599 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4600 { 4601 PetscInt m, N, i, rstart, nnz, Ii; 4602 PetscInt *indx; 4603 PetscScalar *values; 4604 MatType rootType; 4605 4606 PetscFunctionBegin; 4607 PetscCall(MatGetSize(inmat, &m, &N)); 4608 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4609 PetscInt *dnz, *onz, sum, bs, cbs; 4610 4611 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4612 /* Check sum(n) = N */ 4613 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4614 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4615 4616 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4617 rstart -= m; 4618 4619 MatPreallocateBegin(comm, m, n, dnz, onz); 4620 for (i = 0; i < m; i++) { 4621 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4622 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4623 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4624 } 4625 4626 PetscCall(MatCreate(comm, outmat)); 4627 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4628 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4629 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4630 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4631 PetscCall(MatSetType(*outmat, rootType)); 4632 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4633 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4634 MatPreallocateEnd(dnz, onz); 4635 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4636 } 4637 4638 /* numeric phase */ 4639 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4640 for (i = 0; i < m; i++) { 4641 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4642 Ii = i + rstart; 4643 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4644 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4645 } 4646 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4647 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4648 PetscFunctionReturn(PETSC_SUCCESS); 4649 } 4650 4651 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4652 { 4653 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4654 4655 PetscFunctionBegin; 4656 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4657 PetscCall(PetscFree(merge->id_r)); 4658 PetscCall(PetscFree(merge->len_s)); 4659 PetscCall(PetscFree(merge->len_r)); 4660 PetscCall(PetscFree(merge->bi)); 4661 PetscCall(PetscFree(merge->bj)); 4662 PetscCall(PetscFree(merge->buf_ri[0])); 4663 PetscCall(PetscFree(merge->buf_ri)); 4664 PetscCall(PetscFree(merge->buf_rj[0])); 4665 PetscCall(PetscFree(merge->buf_rj)); 4666 PetscCall(PetscFree(merge->coi)); 4667 PetscCall(PetscFree(merge->coj)); 4668 PetscCall(PetscFree(merge->owners_co)); 4669 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4670 PetscCall(PetscFree(merge)); 4671 PetscFunctionReturn(PETSC_SUCCESS); 4672 } 4673 4674 #include <../src/mat/utils/freespace.h> 4675 #include <petscbt.h> 4676 4677 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4678 { 4679 MPI_Comm comm; 4680 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4681 PetscMPIInt size, rank, taga, *len_s; 4682 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4683 PetscInt proc, m; 4684 PetscInt **buf_ri, **buf_rj; 4685 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4686 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4687 MPI_Request *s_waits, *r_waits; 4688 MPI_Status *status; 4689 const MatScalar *aa, *a_a; 4690 MatScalar **abuf_r, *ba_i; 4691 Mat_Merge_SeqsToMPI *merge; 4692 PetscContainer container; 4693 4694 PetscFunctionBegin; 4695 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4696 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4697 4698 PetscCallMPI(MPI_Comm_size(comm, &size)); 4699 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4700 4701 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4702 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4703 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4704 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4705 aa = a_a; 4706 4707 bi = merge->bi; 4708 bj = merge->bj; 4709 buf_ri = merge->buf_ri; 4710 buf_rj = merge->buf_rj; 4711 4712 PetscCall(PetscMalloc1(size, &status)); 4713 owners = merge->rowmap->range; 4714 len_s = merge->len_s; 4715 4716 /* send and recv matrix values */ 4717 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4718 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4719 4720 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4721 for (proc = 0, k = 0; proc < size; proc++) { 4722 if (!len_s[proc]) continue; 4723 i = owners[proc]; 4724 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4725 k++; 4726 } 4727 4728 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4729 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4730 PetscCall(PetscFree(status)); 4731 4732 PetscCall(PetscFree(s_waits)); 4733 PetscCall(PetscFree(r_waits)); 4734 4735 /* insert mat values of mpimat */ 4736 PetscCall(PetscMalloc1(N, &ba_i)); 4737 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4738 4739 for (k = 0; k < merge->nrecv; k++) { 4740 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4741 nrows = *(buf_ri_k[k]); 4742 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4743 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4744 } 4745 4746 /* set values of ba */ 4747 m = merge->rowmap->n; 4748 for (i = 0; i < m; i++) { 4749 arow = owners[rank] + i; 4750 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4751 bnzi = bi[i + 1] - bi[i]; 4752 PetscCall(PetscArrayzero(ba_i, bnzi)); 4753 4754 /* add local non-zero vals of this proc's seqmat into ba */ 4755 anzi = ai[arow + 1] - ai[arow]; 4756 aj = a->j + ai[arow]; 4757 aa = a_a + ai[arow]; 4758 nextaj = 0; 4759 for (j = 0; nextaj < anzi; j++) { 4760 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4761 ba_i[j] += aa[nextaj++]; 4762 } 4763 } 4764 4765 /* add received vals into ba */ 4766 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4767 /* i-th row */ 4768 if (i == *nextrow[k]) { 4769 anzi = *(nextai[k] + 1) - *nextai[k]; 4770 aj = buf_rj[k] + *(nextai[k]); 4771 aa = abuf_r[k] + *(nextai[k]); 4772 nextaj = 0; 4773 for (j = 0; nextaj < anzi; j++) { 4774 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4775 ba_i[j] += aa[nextaj++]; 4776 } 4777 } 4778 nextrow[k]++; 4779 nextai[k]++; 4780 } 4781 } 4782 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4783 } 4784 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4785 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4786 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4787 4788 PetscCall(PetscFree(abuf_r[0])); 4789 PetscCall(PetscFree(abuf_r)); 4790 PetscCall(PetscFree(ba_i)); 4791 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4792 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4793 PetscFunctionReturn(PETSC_SUCCESS); 4794 } 4795 4796 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4797 { 4798 Mat B_mpi; 4799 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4800 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4801 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4802 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4803 PetscInt len, proc, *dnz, *onz, bs, cbs; 4804 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4805 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4806 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4807 MPI_Status *status; 4808 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4809 PetscBT lnkbt; 4810 Mat_Merge_SeqsToMPI *merge; 4811 PetscContainer container; 4812 4813 PetscFunctionBegin; 4814 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4815 4816 /* make sure it is a PETSc comm */ 4817 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4818 PetscCallMPI(MPI_Comm_size(comm, &size)); 4819 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4820 4821 PetscCall(PetscNew(&merge)); 4822 PetscCall(PetscMalloc1(size, &status)); 4823 4824 /* determine row ownership */ 4825 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4826 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4827 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4828 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4829 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4830 PetscCall(PetscMalloc1(size, &len_si)); 4831 PetscCall(PetscMalloc1(size, &merge->len_s)); 4832 4833 m = merge->rowmap->n; 4834 owners = merge->rowmap->range; 4835 4836 /* determine the number of messages to send, their lengths */ 4837 len_s = merge->len_s; 4838 4839 len = 0; /* length of buf_si[] */ 4840 merge->nsend = 0; 4841 for (proc = 0; proc < size; proc++) { 4842 len_si[proc] = 0; 4843 if (proc == rank) { 4844 len_s[proc] = 0; 4845 } else { 4846 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4847 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4848 } 4849 if (len_s[proc]) { 4850 merge->nsend++; 4851 nrows = 0; 4852 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4853 if (ai[i + 1] > ai[i]) nrows++; 4854 } 4855 len_si[proc] = 2 * (nrows + 1); 4856 len += len_si[proc]; 4857 } 4858 } 4859 4860 /* determine the number and length of messages to receive for ij-structure */ 4861 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4862 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4863 4864 /* post the Irecv of j-structure */ 4865 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4866 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4867 4868 /* post the Isend of j-structure */ 4869 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4870 4871 for (proc = 0, k = 0; proc < size; proc++) { 4872 if (!len_s[proc]) continue; 4873 i = owners[proc]; 4874 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4875 k++; 4876 } 4877 4878 /* receives and sends of j-structure are complete */ 4879 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4880 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4881 4882 /* send and recv i-structure */ 4883 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4884 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4885 4886 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4887 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4888 for (proc = 0, k = 0; proc < size; proc++) { 4889 if (!len_s[proc]) continue; 4890 /* form outgoing message for i-structure: 4891 buf_si[0]: nrows to be sent 4892 [1:nrows]: row index (global) 4893 [nrows+1:2*nrows+1]: i-structure index 4894 */ 4895 nrows = len_si[proc] / 2 - 1; 4896 buf_si_i = buf_si + nrows + 1; 4897 buf_si[0] = nrows; 4898 buf_si_i[0] = 0; 4899 nrows = 0; 4900 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4901 anzi = ai[i + 1] - ai[i]; 4902 if (anzi) { 4903 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4904 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4905 nrows++; 4906 } 4907 } 4908 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4909 k++; 4910 buf_si += len_si[proc]; 4911 } 4912 4913 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4914 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4915 4916 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4917 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4918 4919 PetscCall(PetscFree(len_si)); 4920 PetscCall(PetscFree(len_ri)); 4921 PetscCall(PetscFree(rj_waits)); 4922 PetscCall(PetscFree2(si_waits, sj_waits)); 4923 PetscCall(PetscFree(ri_waits)); 4924 PetscCall(PetscFree(buf_s)); 4925 PetscCall(PetscFree(status)); 4926 4927 /* compute a local seq matrix in each processor */ 4928 /* allocate bi array and free space for accumulating nonzero column info */ 4929 PetscCall(PetscMalloc1(m + 1, &bi)); 4930 bi[0] = 0; 4931 4932 /* create and initialize a linked list */ 4933 nlnk = N + 1; 4934 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4935 4936 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4937 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4938 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4939 4940 current_space = free_space; 4941 4942 /* determine symbolic info for each local row */ 4943 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4944 4945 for (k = 0; k < merge->nrecv; k++) { 4946 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4947 nrows = *buf_ri_k[k]; 4948 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4949 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4950 } 4951 4952 MatPreallocateBegin(comm, m, n, dnz, onz); 4953 len = 0; 4954 for (i = 0; i < m; i++) { 4955 bnzi = 0; 4956 /* add local non-zero cols of this proc's seqmat into lnk */ 4957 arow = owners[rank] + i; 4958 anzi = ai[arow + 1] - ai[arow]; 4959 aj = a->j + ai[arow]; 4960 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4961 bnzi += nlnk; 4962 /* add received col data into lnk */ 4963 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4964 if (i == *nextrow[k]) { /* i-th row */ 4965 anzi = *(nextai[k] + 1) - *nextai[k]; 4966 aj = buf_rj[k] + *nextai[k]; 4967 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4968 bnzi += nlnk; 4969 nextrow[k]++; 4970 nextai[k]++; 4971 } 4972 } 4973 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4974 4975 /* if free space is not available, make more free space */ 4976 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4977 /* copy data into free space, then initialize lnk */ 4978 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4979 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4980 4981 current_space->array += bnzi; 4982 current_space->local_used += bnzi; 4983 current_space->local_remaining -= bnzi; 4984 4985 bi[i + 1] = bi[i] + bnzi; 4986 } 4987 4988 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4989 4990 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 4991 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4992 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4993 4994 /* create symbolic parallel matrix B_mpi */ 4995 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4996 PetscCall(MatCreate(comm, &B_mpi)); 4997 if (n == PETSC_DECIDE) { 4998 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4999 } else { 5000 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5001 } 5002 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5003 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5004 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5005 MatPreallocateEnd(dnz, onz); 5006 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5007 5008 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5009 B_mpi->assembled = PETSC_FALSE; 5010 merge->bi = bi; 5011 merge->bj = bj; 5012 merge->buf_ri = buf_ri; 5013 merge->buf_rj = buf_rj; 5014 merge->coi = NULL; 5015 merge->coj = NULL; 5016 merge->owners_co = NULL; 5017 5018 PetscCall(PetscCommDestroy(&comm)); 5019 5020 /* attach the supporting struct to B_mpi for reuse */ 5021 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5022 PetscCall(PetscContainerSetPointer(container, merge)); 5023 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5024 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5025 PetscCall(PetscContainerDestroy(&container)); 5026 *mpimat = B_mpi; 5027 5028 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5029 PetscFunctionReturn(PETSC_SUCCESS); 5030 } 5031 5032 /*@C 5033 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5034 matrices from each processor 5035 5036 Collective 5037 5038 Input Parameters: 5039 + comm - the communicators the parallel matrix will live on 5040 . seqmat - the input sequential matrices 5041 . m - number of local rows (or `PETSC_DECIDE`) 5042 . n - number of local columns (or `PETSC_DECIDE`) 5043 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5044 5045 Output Parameter: 5046 . mpimat - the parallel matrix generated 5047 5048 Level: advanced 5049 5050 Note: 5051 The dimensions of the sequential matrix in each processor MUST be the same. 5052 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5053 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5054 5055 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5056 @*/ 5057 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5058 { 5059 PetscMPIInt size; 5060 5061 PetscFunctionBegin; 5062 PetscCallMPI(MPI_Comm_size(comm, &size)); 5063 if (size == 1) { 5064 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5065 if (scall == MAT_INITIAL_MATRIX) { 5066 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5067 } else { 5068 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5069 } 5070 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5071 PetscFunctionReturn(PETSC_SUCCESS); 5072 } 5073 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5074 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5075 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5076 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5077 PetscFunctionReturn(PETSC_SUCCESS); 5078 } 5079 5080 /*@ 5081 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5082 5083 Not Collective 5084 5085 Input Parameter: 5086 . A - the matrix 5087 5088 Output Parameter: 5089 . A_loc - the local sequential matrix generated 5090 5091 Level: developer 5092 5093 Notes: 5094 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5095 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5096 `n` is the global column count obtained with `MatGetSize()` 5097 5098 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5099 5100 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5101 5102 Destroy the matrix with `MatDestroy()` 5103 5104 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5105 @*/ 5106 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5107 { 5108 PetscBool mpi; 5109 5110 PetscFunctionBegin; 5111 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5112 if (mpi) { 5113 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5114 } else { 5115 *A_loc = A; 5116 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5117 } 5118 PetscFunctionReturn(PETSC_SUCCESS); 5119 } 5120 5121 /*@ 5122 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5123 5124 Not Collective 5125 5126 Input Parameters: 5127 + A - the matrix 5128 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5129 5130 Output Parameter: 5131 . A_loc - the local sequential matrix generated 5132 5133 Level: developer 5134 5135 Notes: 5136 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5137 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5138 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5139 5140 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5141 5142 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5143 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5144 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5145 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5146 5147 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5148 @*/ 5149 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5150 { 5151 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5152 Mat_SeqAIJ *mat, *a, *b; 5153 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5154 const PetscScalar *aa, *ba, *aav, *bav; 5155 PetscScalar *ca, *cam; 5156 PetscMPIInt size; 5157 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5158 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5159 PetscBool match; 5160 5161 PetscFunctionBegin; 5162 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5163 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5164 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5165 if (size == 1) { 5166 if (scall == MAT_INITIAL_MATRIX) { 5167 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5168 *A_loc = mpimat->A; 5169 } else if (scall == MAT_REUSE_MATRIX) { 5170 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5171 } 5172 PetscFunctionReturn(PETSC_SUCCESS); 5173 } 5174 5175 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5176 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5177 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5178 ai = a->i; 5179 aj = a->j; 5180 bi = b->i; 5181 bj = b->j; 5182 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5183 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5184 aa = aav; 5185 ba = bav; 5186 if (scall == MAT_INITIAL_MATRIX) { 5187 PetscCall(PetscMalloc1(1 + am, &ci)); 5188 ci[0] = 0; 5189 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5190 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5191 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5192 k = 0; 5193 for (i = 0; i < am; i++) { 5194 ncols_o = bi[i + 1] - bi[i]; 5195 ncols_d = ai[i + 1] - ai[i]; 5196 /* off-diagonal portion of A */ 5197 for (jo = 0; jo < ncols_o; jo++) { 5198 col = cmap[*bj]; 5199 if (col >= cstart) break; 5200 cj[k] = col; 5201 bj++; 5202 ca[k++] = *ba++; 5203 } 5204 /* diagonal portion of A */ 5205 for (j = 0; j < ncols_d; j++) { 5206 cj[k] = cstart + *aj++; 5207 ca[k++] = *aa++; 5208 } 5209 /* off-diagonal portion of A */ 5210 for (j = jo; j < ncols_o; j++) { 5211 cj[k] = cmap[*bj++]; 5212 ca[k++] = *ba++; 5213 } 5214 } 5215 /* put together the new matrix */ 5216 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5217 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5218 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5219 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5220 mat->free_a = PETSC_TRUE; 5221 mat->free_ij = PETSC_TRUE; 5222 mat->nonew = 0; 5223 } else if (scall == MAT_REUSE_MATRIX) { 5224 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5225 ci = mat->i; 5226 cj = mat->j; 5227 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5228 for (i = 0; i < am; i++) { 5229 /* off-diagonal portion of A */ 5230 ncols_o = bi[i + 1] - bi[i]; 5231 for (jo = 0; jo < ncols_o; jo++) { 5232 col = cmap[*bj]; 5233 if (col >= cstart) break; 5234 *cam++ = *ba++; 5235 bj++; 5236 } 5237 /* diagonal portion of A */ 5238 ncols_d = ai[i + 1] - ai[i]; 5239 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5240 /* off-diagonal portion of A */ 5241 for (j = jo; j < ncols_o; j++) { 5242 *cam++ = *ba++; 5243 bj++; 5244 } 5245 } 5246 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5247 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5248 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5249 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5250 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5251 PetscFunctionReturn(PETSC_SUCCESS); 5252 } 5253 5254 /*@ 5255 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5256 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5257 5258 Not Collective 5259 5260 Input Parameters: 5261 + A - the matrix 5262 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5263 5264 Output Parameters: 5265 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5266 - A_loc - the local sequential matrix generated 5267 5268 Level: developer 5269 5270 Note: 5271 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5272 part, then those associated with the off-diagonal part (in its local ordering) 5273 5274 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5275 @*/ 5276 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5277 { 5278 Mat Ao, Ad; 5279 const PetscInt *cmap; 5280 PetscMPIInt size; 5281 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5282 5283 PetscFunctionBegin; 5284 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5285 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5286 if (size == 1) { 5287 if (scall == MAT_INITIAL_MATRIX) { 5288 PetscCall(PetscObjectReference((PetscObject)Ad)); 5289 *A_loc = Ad; 5290 } else if (scall == MAT_REUSE_MATRIX) { 5291 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5292 } 5293 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5294 PetscFunctionReturn(PETSC_SUCCESS); 5295 } 5296 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5297 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5298 if (f) { 5299 PetscCall((*f)(A, scall, glob, A_loc)); 5300 } else { 5301 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5302 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5303 Mat_SeqAIJ *c; 5304 PetscInt *ai = a->i, *aj = a->j; 5305 PetscInt *bi = b->i, *bj = b->j; 5306 PetscInt *ci, *cj; 5307 const PetscScalar *aa, *ba; 5308 PetscScalar *ca; 5309 PetscInt i, j, am, dn, on; 5310 5311 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5312 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5313 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5314 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5315 if (scall == MAT_INITIAL_MATRIX) { 5316 PetscInt k; 5317 PetscCall(PetscMalloc1(1 + am, &ci)); 5318 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5319 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5320 ci[0] = 0; 5321 for (i = 0, k = 0; i < am; i++) { 5322 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5323 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5324 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5325 /* diagonal portion of A */ 5326 for (j = 0; j < ncols_d; j++, k++) { 5327 cj[k] = *aj++; 5328 ca[k] = *aa++; 5329 } 5330 /* off-diagonal portion of A */ 5331 for (j = 0; j < ncols_o; j++, k++) { 5332 cj[k] = dn + *bj++; 5333 ca[k] = *ba++; 5334 } 5335 } 5336 /* put together the new matrix */ 5337 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5338 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5339 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5340 c = (Mat_SeqAIJ *)(*A_loc)->data; 5341 c->free_a = PETSC_TRUE; 5342 c->free_ij = PETSC_TRUE; 5343 c->nonew = 0; 5344 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5345 } else if (scall == MAT_REUSE_MATRIX) { 5346 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5347 for (i = 0; i < am; i++) { 5348 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5349 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5350 /* diagonal portion of A */ 5351 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5352 /* off-diagonal portion of A */ 5353 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5354 } 5355 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5356 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5357 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5358 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5359 if (glob) { 5360 PetscInt cst, *gidx; 5361 5362 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5363 PetscCall(PetscMalloc1(dn + on, &gidx)); 5364 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5365 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5366 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5367 } 5368 } 5369 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5370 PetscFunctionReturn(PETSC_SUCCESS); 5371 } 5372 5373 /*@C 5374 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5375 5376 Not Collective 5377 5378 Input Parameters: 5379 + A - the matrix 5380 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5381 . row - index set of rows to extract (or `NULL`) 5382 - col - index set of columns to extract (or `NULL`) 5383 5384 Output Parameter: 5385 . A_loc - the local sequential matrix generated 5386 5387 Level: developer 5388 5389 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5390 @*/ 5391 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5392 { 5393 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5394 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5395 IS isrowa, iscola; 5396 Mat *aloc; 5397 PetscBool match; 5398 5399 PetscFunctionBegin; 5400 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5401 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5402 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5403 if (!row) { 5404 start = A->rmap->rstart; 5405 end = A->rmap->rend; 5406 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5407 } else { 5408 isrowa = *row; 5409 } 5410 if (!col) { 5411 start = A->cmap->rstart; 5412 cmap = a->garray; 5413 nzA = a->A->cmap->n; 5414 nzB = a->B->cmap->n; 5415 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5416 ncols = 0; 5417 for (i = 0; i < nzB; i++) { 5418 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5419 else break; 5420 } 5421 imark = i; 5422 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5423 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5424 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5425 } else { 5426 iscola = *col; 5427 } 5428 if (scall != MAT_INITIAL_MATRIX) { 5429 PetscCall(PetscMalloc1(1, &aloc)); 5430 aloc[0] = *A_loc; 5431 } 5432 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5433 if (!col) { /* attach global id of condensed columns */ 5434 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5435 } 5436 *A_loc = aloc[0]; 5437 PetscCall(PetscFree(aloc)); 5438 if (!row) PetscCall(ISDestroy(&isrowa)); 5439 if (!col) PetscCall(ISDestroy(&iscola)); 5440 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5441 PetscFunctionReturn(PETSC_SUCCESS); 5442 } 5443 5444 /* 5445 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5446 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5447 * on a global size. 5448 * */ 5449 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5450 { 5451 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5452 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5453 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5454 PetscMPIInt owner; 5455 PetscSFNode *iremote, *oiremote; 5456 const PetscInt *lrowindices; 5457 PetscSF sf, osf; 5458 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5459 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5460 MPI_Comm comm; 5461 ISLocalToGlobalMapping mapping; 5462 const PetscScalar *pd_a, *po_a; 5463 5464 PetscFunctionBegin; 5465 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5466 /* plocalsize is the number of roots 5467 * nrows is the number of leaves 5468 * */ 5469 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5470 PetscCall(ISGetLocalSize(rows, &nrows)); 5471 PetscCall(PetscCalloc1(nrows, &iremote)); 5472 PetscCall(ISGetIndices(rows, &lrowindices)); 5473 for (i = 0; i < nrows; i++) { 5474 /* Find a remote index and an owner for a row 5475 * The row could be local or remote 5476 * */ 5477 owner = 0; 5478 lidx = 0; 5479 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5480 iremote[i].index = lidx; 5481 iremote[i].rank = owner; 5482 } 5483 /* Create SF to communicate how many nonzero columns for each row */ 5484 PetscCall(PetscSFCreate(comm, &sf)); 5485 /* SF will figure out the number of nonzero columns for each row, and their 5486 * offsets 5487 * */ 5488 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5489 PetscCall(PetscSFSetFromOptions(sf)); 5490 PetscCall(PetscSFSetUp(sf)); 5491 5492 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5493 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5494 PetscCall(PetscCalloc1(nrows, &pnnz)); 5495 roffsets[0] = 0; 5496 roffsets[1] = 0; 5497 for (i = 0; i < plocalsize; i++) { 5498 /* diagonal */ 5499 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5500 /* off-diagonal */ 5501 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5502 /* compute offsets so that we relative location for each row */ 5503 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5504 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5505 } 5506 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5507 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5508 /* 'r' means root, and 'l' means leaf */ 5509 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5510 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5511 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5512 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5513 PetscCall(PetscSFDestroy(&sf)); 5514 PetscCall(PetscFree(roffsets)); 5515 PetscCall(PetscFree(nrcols)); 5516 dntotalcols = 0; 5517 ontotalcols = 0; 5518 ncol = 0; 5519 for (i = 0; i < nrows; i++) { 5520 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5521 ncol = PetscMax(pnnz[i], ncol); 5522 /* diagonal */ 5523 dntotalcols += nlcols[i * 2 + 0]; 5524 /* off-diagonal */ 5525 ontotalcols += nlcols[i * 2 + 1]; 5526 } 5527 /* We do not need to figure the right number of columns 5528 * since all the calculations will be done by going through the raw data 5529 * */ 5530 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5531 PetscCall(MatSetUp(*P_oth)); 5532 PetscCall(PetscFree(pnnz)); 5533 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5534 /* diagonal */ 5535 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5536 /* off-diagonal */ 5537 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5538 /* diagonal */ 5539 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5540 /* off-diagonal */ 5541 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5542 dntotalcols = 0; 5543 ontotalcols = 0; 5544 ntotalcols = 0; 5545 for (i = 0; i < nrows; i++) { 5546 owner = 0; 5547 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5548 /* Set iremote for diag matrix */ 5549 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5550 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5551 iremote[dntotalcols].rank = owner; 5552 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5553 ilocal[dntotalcols++] = ntotalcols++; 5554 } 5555 /* off-diagonal */ 5556 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5557 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5558 oiremote[ontotalcols].rank = owner; 5559 oilocal[ontotalcols++] = ntotalcols++; 5560 } 5561 } 5562 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5563 PetscCall(PetscFree(loffsets)); 5564 PetscCall(PetscFree(nlcols)); 5565 PetscCall(PetscSFCreate(comm, &sf)); 5566 /* P serves as roots and P_oth is leaves 5567 * Diag matrix 5568 * */ 5569 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5570 PetscCall(PetscSFSetFromOptions(sf)); 5571 PetscCall(PetscSFSetUp(sf)); 5572 5573 PetscCall(PetscSFCreate(comm, &osf)); 5574 /* off-diagonal */ 5575 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5576 PetscCall(PetscSFSetFromOptions(osf)); 5577 PetscCall(PetscSFSetUp(osf)); 5578 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5579 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5580 /* operate on the matrix internal data to save memory */ 5581 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5582 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5583 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5584 /* Convert to global indices for diag matrix */ 5585 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5586 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5587 /* We want P_oth store global indices */ 5588 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5589 /* Use memory scalable approach */ 5590 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5591 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5592 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5593 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5594 /* Convert back to local indices */ 5595 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5596 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5597 nout = 0; 5598 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5599 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5600 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5601 /* Exchange values */ 5602 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5603 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5604 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5605 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5606 /* Stop PETSc from shrinking memory */ 5607 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5608 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5609 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5610 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5611 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5612 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5613 PetscCall(PetscSFDestroy(&sf)); 5614 PetscCall(PetscSFDestroy(&osf)); 5615 PetscFunctionReturn(PETSC_SUCCESS); 5616 } 5617 5618 /* 5619 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5620 * This supports MPIAIJ and MAIJ 5621 * */ 5622 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5623 { 5624 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5625 Mat_SeqAIJ *p_oth; 5626 IS rows, map; 5627 PetscHMapI hamp; 5628 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5629 MPI_Comm comm; 5630 PetscSF sf, osf; 5631 PetscBool has; 5632 5633 PetscFunctionBegin; 5634 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5635 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5636 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5637 * and then create a submatrix (that often is an overlapping matrix) 5638 * */ 5639 if (reuse == MAT_INITIAL_MATRIX) { 5640 /* Use a hash table to figure out unique keys */ 5641 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5642 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5643 count = 0; 5644 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5645 for (i = 0; i < a->B->cmap->n; i++) { 5646 key = a->garray[i] / dof; 5647 PetscCall(PetscHMapIHas(hamp, key, &has)); 5648 if (!has) { 5649 mapping[i] = count; 5650 PetscCall(PetscHMapISet(hamp, key, count++)); 5651 } else { 5652 /* Current 'i' has the same value the previous step */ 5653 mapping[i] = count - 1; 5654 } 5655 } 5656 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5657 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5658 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5659 PetscCall(PetscCalloc1(htsize, &rowindices)); 5660 off = 0; 5661 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5662 PetscCall(PetscHMapIDestroy(&hamp)); 5663 PetscCall(PetscSortInt(htsize, rowindices)); 5664 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5665 /* In case, the matrix was already created but users want to recreate the matrix */ 5666 PetscCall(MatDestroy(P_oth)); 5667 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5668 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5669 PetscCall(ISDestroy(&map)); 5670 PetscCall(ISDestroy(&rows)); 5671 } else if (reuse == MAT_REUSE_MATRIX) { 5672 /* If matrix was already created, we simply update values using SF objects 5673 * that as attached to the matrix earlier. 5674 */ 5675 const PetscScalar *pd_a, *po_a; 5676 5677 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5678 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5679 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5680 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5681 /* Update values in place */ 5682 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5683 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5684 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5685 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5686 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5687 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5688 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5689 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5690 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5691 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5692 PetscFunctionReturn(PETSC_SUCCESS); 5693 } 5694 5695 /*@C 5696 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5697 5698 Collective 5699 5700 Input Parameters: 5701 + A - the first matrix in `MATMPIAIJ` format 5702 . B - the second matrix in `MATMPIAIJ` format 5703 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5704 5705 Output Parameters: 5706 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5707 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5708 - B_seq - the sequential matrix generated 5709 5710 Level: developer 5711 5712 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5713 @*/ 5714 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5715 { 5716 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5717 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5718 IS isrowb, iscolb; 5719 Mat *bseq = NULL; 5720 5721 PetscFunctionBegin; 5722 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5723 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5724 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5725 5726 if (scall == MAT_INITIAL_MATRIX) { 5727 start = A->cmap->rstart; 5728 cmap = a->garray; 5729 nzA = a->A->cmap->n; 5730 nzB = a->B->cmap->n; 5731 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5732 ncols = 0; 5733 for (i = 0; i < nzB; i++) { /* row < local row index */ 5734 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5735 else break; 5736 } 5737 imark = i; 5738 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5739 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5740 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5741 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5742 } else { 5743 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5744 isrowb = *rowb; 5745 iscolb = *colb; 5746 PetscCall(PetscMalloc1(1, &bseq)); 5747 bseq[0] = *B_seq; 5748 } 5749 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5750 *B_seq = bseq[0]; 5751 PetscCall(PetscFree(bseq)); 5752 if (!rowb) { 5753 PetscCall(ISDestroy(&isrowb)); 5754 } else { 5755 *rowb = isrowb; 5756 } 5757 if (!colb) { 5758 PetscCall(ISDestroy(&iscolb)); 5759 } else { 5760 *colb = iscolb; 5761 } 5762 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5763 PetscFunctionReturn(PETSC_SUCCESS); 5764 } 5765 5766 /* 5767 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5768 of the OFF-DIAGONAL portion of local A 5769 5770 Collective 5771 5772 Input Parameters: 5773 + A,B - the matrices in `MATMPIAIJ` format 5774 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5775 5776 Output Parameter: 5777 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5778 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5779 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5780 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5781 5782 Developer Note: 5783 This directly accesses information inside the VecScatter associated with the matrix-vector product 5784 for this matrix. This is not desirable.. 5785 5786 Level: developer 5787 5788 */ 5789 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5790 { 5791 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5792 Mat_SeqAIJ *b_oth; 5793 VecScatter ctx; 5794 MPI_Comm comm; 5795 const PetscMPIInt *rprocs, *sprocs; 5796 const PetscInt *srow, *rstarts, *sstarts; 5797 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5798 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5799 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5800 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5801 PetscMPIInt size, tag, rank, nreqs; 5802 5803 PetscFunctionBegin; 5804 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5805 PetscCallMPI(MPI_Comm_size(comm, &size)); 5806 5807 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5808 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5809 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5810 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5811 5812 if (size == 1) { 5813 startsj_s = NULL; 5814 bufa_ptr = NULL; 5815 *B_oth = NULL; 5816 PetscFunctionReturn(PETSC_SUCCESS); 5817 } 5818 5819 ctx = a->Mvctx; 5820 tag = ((PetscObject)ctx)->tag; 5821 5822 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5823 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5824 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5825 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5826 PetscCall(PetscMalloc1(nreqs, &reqs)); 5827 rwaits = reqs; 5828 swaits = reqs + nrecvs; 5829 5830 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5831 if (scall == MAT_INITIAL_MATRIX) { 5832 /* i-array */ 5833 /* post receives */ 5834 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5835 for (i = 0; i < nrecvs; i++) { 5836 rowlen = rvalues + rstarts[i] * rbs; 5837 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5838 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5839 } 5840 5841 /* pack the outgoing message */ 5842 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5843 5844 sstartsj[0] = 0; 5845 rstartsj[0] = 0; 5846 len = 0; /* total length of j or a array to be sent */ 5847 if (nsends) { 5848 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5849 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5850 } 5851 for (i = 0; i < nsends; i++) { 5852 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5853 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5854 for (j = 0; j < nrows; j++) { 5855 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5856 for (l = 0; l < sbs; l++) { 5857 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5858 5859 rowlen[j * sbs + l] = ncols; 5860 5861 len += ncols; 5862 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5863 } 5864 k++; 5865 } 5866 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5867 5868 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5869 } 5870 /* recvs and sends of i-array are completed */ 5871 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5872 PetscCall(PetscFree(svalues)); 5873 5874 /* allocate buffers for sending j and a arrays */ 5875 PetscCall(PetscMalloc1(len + 1, &bufj)); 5876 PetscCall(PetscMalloc1(len + 1, &bufa)); 5877 5878 /* create i-array of B_oth */ 5879 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5880 5881 b_othi[0] = 0; 5882 len = 0; /* total length of j or a array to be received */ 5883 k = 0; 5884 for (i = 0; i < nrecvs; i++) { 5885 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5886 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5887 for (j = 0; j < nrows; j++) { 5888 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5889 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5890 k++; 5891 } 5892 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5893 } 5894 PetscCall(PetscFree(rvalues)); 5895 5896 /* allocate space for j and a arrays of B_oth */ 5897 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5898 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5899 5900 /* j-array */ 5901 /* post receives of j-array */ 5902 for (i = 0; i < nrecvs; i++) { 5903 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5904 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5905 } 5906 5907 /* pack the outgoing message j-array */ 5908 if (nsends) k = sstarts[0]; 5909 for (i = 0; i < nsends; i++) { 5910 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5911 bufJ = bufj + sstartsj[i]; 5912 for (j = 0; j < nrows; j++) { 5913 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5914 for (ll = 0; ll < sbs; ll++) { 5915 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5916 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5917 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5918 } 5919 } 5920 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5921 } 5922 5923 /* recvs and sends of j-array are completed */ 5924 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5925 } else if (scall == MAT_REUSE_MATRIX) { 5926 sstartsj = *startsj_s; 5927 rstartsj = *startsj_r; 5928 bufa = *bufa_ptr; 5929 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5930 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5931 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5932 5933 /* a-array */ 5934 /* post receives of a-array */ 5935 for (i = 0; i < nrecvs; i++) { 5936 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5937 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5938 } 5939 5940 /* pack the outgoing message a-array */ 5941 if (nsends) k = sstarts[0]; 5942 for (i = 0; i < nsends; i++) { 5943 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5944 bufA = bufa + sstartsj[i]; 5945 for (j = 0; j < nrows; j++) { 5946 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5947 for (ll = 0; ll < sbs; ll++) { 5948 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5949 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5950 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5951 } 5952 } 5953 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5954 } 5955 /* recvs and sends of a-array are completed */ 5956 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5957 PetscCall(PetscFree(reqs)); 5958 5959 if (scall == MAT_INITIAL_MATRIX) { 5960 /* put together the new matrix */ 5961 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5962 5963 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5964 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5965 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5966 b_oth->free_a = PETSC_TRUE; 5967 b_oth->free_ij = PETSC_TRUE; 5968 b_oth->nonew = 0; 5969 5970 PetscCall(PetscFree(bufj)); 5971 if (!startsj_s || !bufa_ptr) { 5972 PetscCall(PetscFree2(sstartsj, rstartsj)); 5973 PetscCall(PetscFree(bufa_ptr)); 5974 } else { 5975 *startsj_s = sstartsj; 5976 *startsj_r = rstartsj; 5977 *bufa_ptr = bufa; 5978 } 5979 } else if (scall == MAT_REUSE_MATRIX) { 5980 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5981 } 5982 5983 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5984 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5985 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5986 PetscFunctionReturn(PETSC_SUCCESS); 5987 } 5988 5989 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5991 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5992 #if defined(PETSC_HAVE_MKL_SPARSE) 5993 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5994 #endif 5995 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5996 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5997 #if defined(PETSC_HAVE_ELEMENTAL) 5998 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5999 #endif 6000 #if defined(PETSC_HAVE_SCALAPACK) 6001 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6002 #endif 6003 #if defined(PETSC_HAVE_HYPRE) 6004 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6005 #endif 6006 #if defined(PETSC_HAVE_CUDA) 6007 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6008 #endif 6009 #if defined(PETSC_HAVE_HIP) 6010 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6011 #endif 6012 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6013 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6014 #endif 6015 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6016 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6017 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6018 6019 /* 6020 Computes (B'*A')' since computing B*A directly is untenable 6021 6022 n p p 6023 [ ] [ ] [ ] 6024 m [ A ] * n [ B ] = m [ C ] 6025 [ ] [ ] [ ] 6026 6027 */ 6028 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6029 { 6030 Mat At, Bt, Ct; 6031 6032 PetscFunctionBegin; 6033 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6034 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6035 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6036 PetscCall(MatDestroy(&At)); 6037 PetscCall(MatDestroy(&Bt)); 6038 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6039 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6040 PetscCall(MatDestroy(&Ct)); 6041 PetscFunctionReturn(PETSC_SUCCESS); 6042 } 6043 6044 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6045 { 6046 PetscBool cisdense; 6047 6048 PetscFunctionBegin; 6049 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6050 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6051 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6052 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6053 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6054 PetscCall(MatSetUp(C)); 6055 6056 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6057 PetscFunctionReturn(PETSC_SUCCESS); 6058 } 6059 6060 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6061 { 6062 Mat_Product *product = C->product; 6063 Mat A = product->A, B = product->B; 6064 6065 PetscFunctionBegin; 6066 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6067 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6068 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6069 C->ops->productsymbolic = MatProductSymbolic_AB; 6070 PetscFunctionReturn(PETSC_SUCCESS); 6071 } 6072 6073 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6074 { 6075 Mat_Product *product = C->product; 6076 6077 PetscFunctionBegin; 6078 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6079 PetscFunctionReturn(PETSC_SUCCESS); 6080 } 6081 6082 /* 6083 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6084 6085 Input Parameters: 6086 6087 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6088 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6089 6090 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6091 6092 For Set1, j1[] contains column indices of the nonzeros. 6093 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6094 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6095 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6096 6097 Similar for Set2. 6098 6099 This routine merges the two sets of nonzeros row by row and removes repeats. 6100 6101 Output Parameters: (memory is allocated by the caller) 6102 6103 i[],j[]: the CSR of the merged matrix, which has m rows. 6104 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6105 imap2[]: similar to imap1[], but for Set2. 6106 Note we order nonzeros row-by-row and from left to right. 6107 */ 6108 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6109 { 6110 PetscInt r, m; /* Row index of mat */ 6111 PetscCount t, t1, t2, b1, e1, b2, e2; 6112 6113 PetscFunctionBegin; 6114 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6115 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6116 i[0] = 0; 6117 for (r = 0; r < m; r++) { /* Do row by row merging */ 6118 b1 = rowBegin1[r]; 6119 e1 = rowEnd1[r]; 6120 b2 = rowBegin2[r]; 6121 e2 = rowEnd2[r]; 6122 while (b1 < e1 && b2 < e2) { 6123 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6124 j[t] = j1[b1]; 6125 imap1[t1] = t; 6126 imap2[t2] = t; 6127 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6128 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6129 t1++; 6130 t2++; 6131 t++; 6132 } else if (j1[b1] < j2[b2]) { 6133 j[t] = j1[b1]; 6134 imap1[t1] = t; 6135 b1 += jmap1[t1 + 1] - jmap1[t1]; 6136 t1++; 6137 t++; 6138 } else { 6139 j[t] = j2[b2]; 6140 imap2[t2] = t; 6141 b2 += jmap2[t2 + 1] - jmap2[t2]; 6142 t2++; 6143 t++; 6144 } 6145 } 6146 /* Merge the remaining in either j1[] or j2[] */ 6147 while (b1 < e1) { 6148 j[t] = j1[b1]; 6149 imap1[t1] = t; 6150 b1 += jmap1[t1 + 1] - jmap1[t1]; 6151 t1++; 6152 t++; 6153 } 6154 while (b2 < e2) { 6155 j[t] = j2[b2]; 6156 imap2[t2] = t; 6157 b2 += jmap2[t2 + 1] - jmap2[t2]; 6158 t2++; 6159 t++; 6160 } 6161 i[r + 1] = t; 6162 } 6163 PetscFunctionReturn(PETSC_SUCCESS); 6164 } 6165 6166 /* 6167 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6168 6169 Input Parameters: 6170 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6171 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6172 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6173 6174 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6175 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6176 6177 Output Parameters: 6178 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6179 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6180 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6181 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6182 6183 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6184 Atot: number of entries belonging to the diagonal block. 6185 Annz: number of unique nonzeros belonging to the diagonal block. 6186 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6187 repeats (i.e., same 'i,j' pair). 6188 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6189 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6190 6191 Atot: number of entries belonging to the diagonal block 6192 Annz: number of unique nonzeros belonging to the diagonal block. 6193 6194 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6195 6196 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6197 */ 6198 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6199 { 6200 PetscInt cstart, cend, rstart, rend, row, col; 6201 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6202 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6203 PetscCount k, m, p, q, r, s, mid; 6204 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6205 6206 PetscFunctionBegin; 6207 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6208 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6209 m = rend - rstart; 6210 6211 /* Skip negative rows */ 6212 for (k = 0; k < n; k++) 6213 if (i[k] >= 0) break; 6214 6215 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6216 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6217 */ 6218 while (k < n) { 6219 row = i[k]; 6220 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6221 for (s = k; s < n; s++) 6222 if (i[s] != row) break; 6223 6224 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6225 for (p = k; p < s; p++) { 6226 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6227 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6228 } 6229 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6230 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6231 rowBegin[row - rstart] = k; 6232 rowMid[row - rstart] = mid; 6233 rowEnd[row - rstart] = s; 6234 6235 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6236 Atot += mid - k; 6237 Btot += s - mid; 6238 6239 /* Count unique nonzeros of this diag row */ 6240 for (p = k; p < mid;) { 6241 col = j[p]; 6242 do { 6243 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6244 p++; 6245 } while (p < mid && j[p] == col); 6246 Annz++; 6247 } 6248 6249 /* Count unique nonzeros of this offdiag row */ 6250 for (p = mid; p < s;) { 6251 col = j[p]; 6252 do { 6253 p++; 6254 } while (p < s && j[p] == col); 6255 Bnnz++; 6256 } 6257 k = s; 6258 } 6259 6260 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6261 PetscCall(PetscMalloc1(Atot, &Aperm)); 6262 PetscCall(PetscMalloc1(Btot, &Bperm)); 6263 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6264 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6265 6266 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6267 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6268 for (r = 0; r < m; r++) { 6269 k = rowBegin[r]; 6270 mid = rowMid[r]; 6271 s = rowEnd[r]; 6272 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6273 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6274 Atot += mid - k; 6275 Btot += s - mid; 6276 6277 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6278 for (p = k; p < mid;) { 6279 col = j[p]; 6280 q = p; 6281 do { 6282 p++; 6283 } while (p < mid && j[p] == col); 6284 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6285 Annz++; 6286 } 6287 6288 for (p = mid; p < s;) { 6289 col = j[p]; 6290 q = p; 6291 do { 6292 p++; 6293 } while (p < s && j[p] == col); 6294 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6295 Bnnz++; 6296 } 6297 } 6298 /* Output */ 6299 *Aperm_ = Aperm; 6300 *Annz_ = Annz; 6301 *Atot_ = Atot; 6302 *Ajmap_ = Ajmap; 6303 *Bperm_ = Bperm; 6304 *Bnnz_ = Bnnz; 6305 *Btot_ = Btot; 6306 *Bjmap_ = Bjmap; 6307 PetscFunctionReturn(PETSC_SUCCESS); 6308 } 6309 6310 /* 6311 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6312 6313 Input Parameters: 6314 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6315 nnz: number of unique nonzeros in the merged matrix 6316 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6317 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6318 6319 Output Parameter: (memory is allocated by the caller) 6320 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6321 6322 Example: 6323 nnz1 = 4 6324 nnz = 6 6325 imap = [1,3,4,5] 6326 jmap = [0,3,5,6,7] 6327 then, 6328 jmap_new = [0,0,3,3,5,6,7] 6329 */ 6330 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6331 { 6332 PetscCount k, p; 6333 6334 PetscFunctionBegin; 6335 jmap_new[0] = 0; 6336 p = nnz; /* p loops over jmap_new[] backwards */ 6337 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6338 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6339 } 6340 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6341 PetscFunctionReturn(PETSC_SUCCESS); 6342 } 6343 6344 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6345 { 6346 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6347 6348 PetscFunctionBegin; 6349 PetscCall(PetscSFDestroy(&coo->sf)); 6350 PetscCall(PetscFree(coo->Aperm1)); 6351 PetscCall(PetscFree(coo->Bperm1)); 6352 PetscCall(PetscFree(coo->Ajmap1)); 6353 PetscCall(PetscFree(coo->Bjmap1)); 6354 PetscCall(PetscFree(coo->Aimap2)); 6355 PetscCall(PetscFree(coo->Bimap2)); 6356 PetscCall(PetscFree(coo->Aperm2)); 6357 PetscCall(PetscFree(coo->Bperm2)); 6358 PetscCall(PetscFree(coo->Ajmap2)); 6359 PetscCall(PetscFree(coo->Bjmap2)); 6360 PetscCall(PetscFree(coo->Cperm1)); 6361 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6362 PetscCall(PetscFree(coo)); 6363 PetscFunctionReturn(PETSC_SUCCESS); 6364 } 6365 6366 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6367 { 6368 MPI_Comm comm; 6369 PetscMPIInt rank, size; 6370 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6371 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6372 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6373 PetscContainer container; 6374 MatCOOStruct_MPIAIJ *coo; 6375 6376 PetscFunctionBegin; 6377 PetscCall(PetscFree(mpiaij->garray)); 6378 PetscCall(VecDestroy(&mpiaij->lvec)); 6379 #if defined(PETSC_USE_CTABLE) 6380 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6381 #else 6382 PetscCall(PetscFree(mpiaij->colmap)); 6383 #endif 6384 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6385 mat->assembled = PETSC_FALSE; 6386 mat->was_assembled = PETSC_FALSE; 6387 6388 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6389 PetscCallMPI(MPI_Comm_size(comm, &size)); 6390 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6391 PetscCall(PetscLayoutSetUp(mat->rmap)); 6392 PetscCall(PetscLayoutSetUp(mat->cmap)); 6393 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6394 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6395 PetscCall(MatGetLocalSize(mat, &m, &n)); 6396 PetscCall(MatGetSize(mat, &M, &N)); 6397 6398 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6399 /* entries come first, then local rows, then remote rows. */ 6400 PetscCount n1 = coo_n, *perm1; 6401 PetscInt *i1 = coo_i, *j1 = coo_j; 6402 6403 PetscCall(PetscMalloc1(n1, &perm1)); 6404 for (k = 0; k < n1; k++) perm1[k] = k; 6405 6406 /* Manipulate indices so that entries with negative row or col indices will have smallest 6407 row indices, local entries will have greater but negative row indices, and remote entries 6408 will have positive row indices. 6409 */ 6410 for (k = 0; k < n1; k++) { 6411 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6412 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6413 else { 6414 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6415 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6416 } 6417 } 6418 6419 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6420 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6421 6422 /* Advance k to the first entry we need to take care of */ 6423 for (k = 0; k < n1; k++) 6424 if (i1[k] > PETSC_MIN_INT) break; 6425 PetscInt i1start = k; 6426 6427 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6428 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6429 6430 /* Send remote rows to their owner */ 6431 /* Find which rows should be sent to which remote ranks*/ 6432 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6433 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6434 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6435 const PetscInt *ranges; 6436 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6437 6438 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6439 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6440 for (k = rem; k < n1;) { 6441 PetscMPIInt owner; 6442 PetscInt firstRow, lastRow; 6443 6444 /* Locate a row range */ 6445 firstRow = i1[k]; /* first row of this owner */ 6446 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6447 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6448 6449 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6450 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6451 6452 /* All entries in [k,p) belong to this remote owner */ 6453 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6454 PetscMPIInt *sendto2; 6455 PetscInt *nentries2; 6456 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6457 6458 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6459 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6460 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6461 PetscCall(PetscFree2(sendto, nentries2)); 6462 sendto = sendto2; 6463 nentries = nentries2; 6464 maxNsend = maxNsend2; 6465 } 6466 sendto[nsend] = owner; 6467 nentries[nsend] = p - k; 6468 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6469 nsend++; 6470 k = p; 6471 } 6472 6473 /* Build 1st SF to know offsets on remote to send data */ 6474 PetscSF sf1; 6475 PetscInt nroots = 1, nroots2 = 0; 6476 PetscInt nleaves = nsend, nleaves2 = 0; 6477 PetscInt *offsets; 6478 PetscSFNode *iremote; 6479 6480 PetscCall(PetscSFCreate(comm, &sf1)); 6481 PetscCall(PetscMalloc1(nsend, &iremote)); 6482 PetscCall(PetscMalloc1(nsend, &offsets)); 6483 for (k = 0; k < nsend; k++) { 6484 iremote[k].rank = sendto[k]; 6485 iremote[k].index = 0; 6486 nleaves2 += nentries[k]; 6487 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6488 } 6489 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6490 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6491 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6492 PetscCall(PetscSFDestroy(&sf1)); 6493 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6494 6495 /* Build 2nd SF to send remote COOs to their owner */ 6496 PetscSF sf2; 6497 nroots = nroots2; 6498 nleaves = nleaves2; 6499 PetscCall(PetscSFCreate(comm, &sf2)); 6500 PetscCall(PetscSFSetFromOptions(sf2)); 6501 PetscCall(PetscMalloc1(nleaves, &iremote)); 6502 p = 0; 6503 for (k = 0; k < nsend; k++) { 6504 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6505 for (q = 0; q < nentries[k]; q++, p++) { 6506 iremote[p].rank = sendto[k]; 6507 iremote[p].index = offsets[k] + q; 6508 } 6509 } 6510 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6511 6512 /* Send the remote COOs to their owner */ 6513 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6514 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6515 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6516 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6517 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6518 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6519 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6520 6521 PetscCall(PetscFree(offsets)); 6522 PetscCall(PetscFree2(sendto, nentries)); 6523 6524 /* Sort received COOs by row along with the permutation array */ 6525 for (k = 0; k < n2; k++) perm2[k] = k; 6526 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6527 6528 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6529 PetscCount *Cperm1; 6530 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6531 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves)); 6532 6533 /* Support for HYPRE matrices, kind of a hack. 6534 Swap min column with diagonal so that diagonal values will go first */ 6535 PetscBool hypre; 6536 const char *name; 6537 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6538 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6539 if (hypre) { 6540 PetscInt *minj; 6541 PetscBT hasdiag; 6542 6543 PetscCall(PetscBTCreate(m, &hasdiag)); 6544 PetscCall(PetscMalloc1(m, &minj)); 6545 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6546 for (k = i1start; k < rem; k++) { 6547 if (j1[k] < cstart || j1[k] >= cend) continue; 6548 const PetscInt rindex = i1[k] - rstart; 6549 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6550 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6551 } 6552 for (k = 0; k < n2; k++) { 6553 if (j2[k] < cstart || j2[k] >= cend) continue; 6554 const PetscInt rindex = i2[k] - rstart; 6555 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6556 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6557 } 6558 for (k = i1start; k < rem; k++) { 6559 const PetscInt rindex = i1[k] - rstart; 6560 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6561 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6562 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6563 } 6564 for (k = 0; k < n2; k++) { 6565 const PetscInt rindex = i2[k] - rstart; 6566 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6567 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6568 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6569 } 6570 PetscCall(PetscBTDestroy(&hasdiag)); 6571 PetscCall(PetscFree(minj)); 6572 } 6573 6574 /* Split local COOs and received COOs into diag/offdiag portions */ 6575 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6576 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6577 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6578 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6579 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6580 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6581 6582 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6583 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6584 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6585 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6586 6587 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6588 PetscInt *Ai, *Bi; 6589 PetscInt *Aj, *Bj; 6590 6591 PetscCall(PetscMalloc1(m + 1, &Ai)); 6592 PetscCall(PetscMalloc1(m + 1, &Bi)); 6593 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6594 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6595 6596 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6597 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6598 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6599 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6600 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6601 6602 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6603 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6604 6605 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6606 /* expect nonzeros in A/B most likely have local contributing entries */ 6607 PetscInt Annz = Ai[m]; 6608 PetscInt Bnnz = Bi[m]; 6609 PetscCount *Ajmap1_new, *Bjmap1_new; 6610 6611 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6612 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6613 6614 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6615 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6616 6617 PetscCall(PetscFree(Aimap1)); 6618 PetscCall(PetscFree(Ajmap1)); 6619 PetscCall(PetscFree(Bimap1)); 6620 PetscCall(PetscFree(Bjmap1)); 6621 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6622 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6623 PetscCall(PetscFree(perm1)); 6624 PetscCall(PetscFree3(i2, j2, perm2)); 6625 6626 Ajmap1 = Ajmap1_new; 6627 Bjmap1 = Bjmap1_new; 6628 6629 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6630 if (Annz < Annz1 + Annz2) { 6631 PetscInt *Aj_new; 6632 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6633 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6634 PetscCall(PetscFree(Aj)); 6635 Aj = Aj_new; 6636 } 6637 6638 if (Bnnz < Bnnz1 + Bnnz2) { 6639 PetscInt *Bj_new; 6640 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6641 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6642 PetscCall(PetscFree(Bj)); 6643 Bj = Bj_new; 6644 } 6645 6646 /* Create new submatrices for on-process and off-process coupling */ 6647 PetscScalar *Aa, *Ba; 6648 MatType rtype; 6649 Mat_SeqAIJ *a, *b; 6650 PetscObjectState state; 6651 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6652 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6653 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6654 if (cstart) { 6655 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6656 } 6657 PetscCall(MatDestroy(&mpiaij->A)); 6658 PetscCall(MatDestroy(&mpiaij->B)); 6659 PetscCall(MatGetRootType_Private(mat, &rtype)); 6660 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6661 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6662 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6663 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6664 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6665 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6666 6667 a = (Mat_SeqAIJ *)mpiaij->A->data; 6668 b = (Mat_SeqAIJ *)mpiaij->B->data; 6669 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6670 a->free_a = b->free_a = PETSC_TRUE; 6671 a->free_ij = b->free_ij = PETSC_TRUE; 6672 6673 /* conversion must happen AFTER multiply setup */ 6674 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6675 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6676 PetscCall(VecDestroy(&mpiaij->lvec)); 6677 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6678 6679 // Put the COO struct in a container and then attach that to the matrix 6680 PetscCall(PetscMalloc1(1, &coo)); 6681 coo->n = coo_n; 6682 coo->sf = sf2; 6683 coo->sendlen = nleaves; 6684 coo->recvlen = nroots; 6685 coo->Annz = Annz; 6686 coo->Bnnz = Bnnz; 6687 coo->Annz2 = Annz2; 6688 coo->Bnnz2 = Bnnz2; 6689 coo->Atot1 = Atot1; 6690 coo->Atot2 = Atot2; 6691 coo->Btot1 = Btot1; 6692 coo->Btot2 = Btot2; 6693 coo->Ajmap1 = Ajmap1; 6694 coo->Aperm1 = Aperm1; 6695 coo->Bjmap1 = Bjmap1; 6696 coo->Bperm1 = Bperm1; 6697 coo->Aimap2 = Aimap2; 6698 coo->Ajmap2 = Ajmap2; 6699 coo->Aperm2 = Aperm2; 6700 coo->Bimap2 = Bimap2; 6701 coo->Bjmap2 = Bjmap2; 6702 coo->Bperm2 = Bperm2; 6703 coo->Cperm1 = Cperm1; 6704 // Allocate in preallocation. If not used, it has zero cost on host 6705 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6706 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6707 PetscCall(PetscContainerSetPointer(container, coo)); 6708 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6709 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6710 PetscCall(PetscContainerDestroy(&container)); 6711 PetscFunctionReturn(PETSC_SUCCESS); 6712 } 6713 6714 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6715 { 6716 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6717 Mat A = mpiaij->A, B = mpiaij->B; 6718 PetscScalar *Aa, *Ba; 6719 PetscScalar *sendbuf, *recvbuf; 6720 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6721 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6722 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6723 const PetscCount *Cperm1; 6724 PetscContainer container; 6725 MatCOOStruct_MPIAIJ *coo; 6726 6727 PetscFunctionBegin; 6728 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6729 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6730 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6731 sendbuf = coo->sendbuf; 6732 recvbuf = coo->recvbuf; 6733 Ajmap1 = coo->Ajmap1; 6734 Ajmap2 = coo->Ajmap2; 6735 Aimap2 = coo->Aimap2; 6736 Bjmap1 = coo->Bjmap1; 6737 Bjmap2 = coo->Bjmap2; 6738 Bimap2 = coo->Bimap2; 6739 Aperm1 = coo->Aperm1; 6740 Aperm2 = coo->Aperm2; 6741 Bperm1 = coo->Bperm1; 6742 Bperm2 = coo->Bperm2; 6743 Cperm1 = coo->Cperm1; 6744 6745 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6746 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6747 6748 /* Pack entries to be sent to remote */ 6749 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6750 6751 /* Send remote entries to their owner and overlap the communication with local computation */ 6752 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6753 /* Add local entries to A and B */ 6754 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6755 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6756 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6757 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6758 } 6759 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6760 PetscScalar sum = 0.0; 6761 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6762 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6763 } 6764 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6765 6766 /* Add received remote entries to A and B */ 6767 for (PetscCount i = 0; i < coo->Annz2; i++) { 6768 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6769 } 6770 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6771 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6772 } 6773 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6774 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6775 PetscFunctionReturn(PETSC_SUCCESS); 6776 } 6777 6778 /*MC 6779 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6780 6781 Options Database Keys: 6782 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6783 6784 Level: beginner 6785 6786 Notes: 6787 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6788 in this case the values associated with the rows and columns one passes in are set to zero 6789 in the matrix 6790 6791 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6792 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6793 6794 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6795 M*/ 6796 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6797 { 6798 Mat_MPIAIJ *b; 6799 PetscMPIInt size; 6800 6801 PetscFunctionBegin; 6802 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6803 6804 PetscCall(PetscNew(&b)); 6805 B->data = (void *)b; 6806 B->ops[0] = MatOps_Values; 6807 B->assembled = PETSC_FALSE; 6808 B->insertmode = NOT_SET_VALUES; 6809 b->size = size; 6810 6811 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6812 6813 /* build cache for off array entries formed */ 6814 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6815 6816 b->donotstash = PETSC_FALSE; 6817 b->colmap = NULL; 6818 b->garray = NULL; 6819 b->roworiented = PETSC_TRUE; 6820 6821 /* stuff used for matrix vector multiply */ 6822 b->lvec = NULL; 6823 b->Mvctx = NULL; 6824 6825 /* stuff for MatGetRow() */ 6826 b->rowindices = NULL; 6827 b->rowvalues = NULL; 6828 b->getrowactive = PETSC_FALSE; 6829 6830 /* flexible pointer used in CUSPARSE classes */ 6831 b->spptr = NULL; 6832 6833 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6834 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6837 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6843 #if defined(PETSC_HAVE_CUDA) 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6845 #endif 6846 #if defined(PETSC_HAVE_HIP) 6847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6848 #endif 6849 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6850 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6851 #endif 6852 #if defined(PETSC_HAVE_MKL_SPARSE) 6853 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6854 #endif 6855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6859 #if defined(PETSC_HAVE_ELEMENTAL) 6860 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6861 #endif 6862 #if defined(PETSC_HAVE_SCALAPACK) 6863 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6864 #endif 6865 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6866 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6867 #if defined(PETSC_HAVE_HYPRE) 6868 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6869 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6870 #endif 6871 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6872 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6873 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6874 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6875 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6876 PetscFunctionReturn(PETSC_SUCCESS); 6877 } 6878 6879 /*@C 6880 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6881 and "off-diagonal" part of the matrix in CSR format. 6882 6883 Collective 6884 6885 Input Parameters: 6886 + comm - MPI communicator 6887 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6888 . n - This value should be the same as the local size used in creating the 6889 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6890 calculated if `N` is given) For square matrices `n` is almost always `m`. 6891 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6892 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6893 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6894 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6895 . a - matrix values 6896 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6897 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6898 - oa - matrix values 6899 6900 Output Parameter: 6901 . mat - the matrix 6902 6903 Level: advanced 6904 6905 Notes: 6906 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6907 must free the arrays once the matrix has been destroyed and not before. 6908 6909 The `i` and `j` indices are 0 based 6910 6911 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6912 6913 This sets local rows and cannot be used to set off-processor values. 6914 6915 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6916 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6917 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6918 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6919 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6920 communication if it is known that only local entries will be set. 6921 6922 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6923 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6924 @*/ 6925 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6926 { 6927 Mat_MPIAIJ *maij; 6928 6929 PetscFunctionBegin; 6930 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6931 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6932 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6933 PetscCall(MatCreate(comm, mat)); 6934 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6935 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6936 maij = (Mat_MPIAIJ *)(*mat)->data; 6937 6938 (*mat)->preallocated = PETSC_TRUE; 6939 6940 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6941 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6942 6943 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6944 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6945 6946 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6947 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6948 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6949 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6950 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6951 PetscFunctionReturn(PETSC_SUCCESS); 6952 } 6953 6954 typedef struct { 6955 Mat *mp; /* intermediate products */ 6956 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6957 PetscInt cp; /* number of intermediate products */ 6958 6959 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6960 PetscInt *startsj_s, *startsj_r; 6961 PetscScalar *bufa; 6962 Mat P_oth; 6963 6964 /* may take advantage of merging product->B */ 6965 Mat Bloc; /* B-local by merging diag and off-diag */ 6966 6967 /* cusparse does not have support to split between symbolic and numeric phases. 6968 When api_user is true, we don't need to update the numerical values 6969 of the temporary storage */ 6970 PetscBool reusesym; 6971 6972 /* support for COO values insertion */ 6973 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6974 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6975 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6976 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6977 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6978 PetscMemType mtype; 6979 6980 /* customization */ 6981 PetscBool abmerge; 6982 PetscBool P_oth_bind; 6983 } MatMatMPIAIJBACKEND; 6984 6985 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6986 { 6987 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6988 PetscInt i; 6989 6990 PetscFunctionBegin; 6991 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6992 PetscCall(PetscFree(mmdata->bufa)); 6993 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6994 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6995 PetscCall(MatDestroy(&mmdata->P_oth)); 6996 PetscCall(MatDestroy(&mmdata->Bloc)); 6997 PetscCall(PetscSFDestroy(&mmdata->sf)); 6998 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6999 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7000 PetscCall(PetscFree(mmdata->own[0])); 7001 PetscCall(PetscFree(mmdata->own)); 7002 PetscCall(PetscFree(mmdata->off[0])); 7003 PetscCall(PetscFree(mmdata->off)); 7004 PetscCall(PetscFree(mmdata)); 7005 PetscFunctionReturn(PETSC_SUCCESS); 7006 } 7007 7008 /* Copy selected n entries with indices in idx[] of A to v[]. 7009 If idx is NULL, copy the whole data array of A to v[] 7010 */ 7011 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7012 { 7013 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7014 7015 PetscFunctionBegin; 7016 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7017 if (f) { 7018 PetscCall((*f)(A, n, idx, v)); 7019 } else { 7020 const PetscScalar *vv; 7021 7022 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7023 if (n && idx) { 7024 PetscScalar *w = v; 7025 const PetscInt *oi = idx; 7026 PetscInt j; 7027 7028 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7029 } else { 7030 PetscCall(PetscArraycpy(v, vv, n)); 7031 } 7032 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7033 } 7034 PetscFunctionReturn(PETSC_SUCCESS); 7035 } 7036 7037 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7038 { 7039 MatMatMPIAIJBACKEND *mmdata; 7040 PetscInt i, n_d, n_o; 7041 7042 PetscFunctionBegin; 7043 MatCheckProduct(C, 1); 7044 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7045 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7046 if (!mmdata->reusesym) { /* update temporary matrices */ 7047 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7048 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7049 } 7050 mmdata->reusesym = PETSC_FALSE; 7051 7052 for (i = 0; i < mmdata->cp; i++) { 7053 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7054 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7055 } 7056 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7057 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7058 7059 if (mmdata->mptmp[i]) continue; 7060 if (noff) { 7061 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7062 7063 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7064 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7065 n_o += noff; 7066 n_d += nown; 7067 } else { 7068 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7069 7070 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7071 n_d += mm->nz; 7072 } 7073 } 7074 if (mmdata->hasoffproc) { /* offprocess insertion */ 7075 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7076 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7077 } 7078 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7079 PetscFunctionReturn(PETSC_SUCCESS); 7080 } 7081 7082 /* Support for Pt * A, A * P, or Pt * A * P */ 7083 #define MAX_NUMBER_INTERMEDIATE 4 7084 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7085 { 7086 Mat_Product *product = C->product; 7087 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7088 Mat_MPIAIJ *a, *p; 7089 MatMatMPIAIJBACKEND *mmdata; 7090 ISLocalToGlobalMapping P_oth_l2g = NULL; 7091 IS glob = NULL; 7092 const char *prefix; 7093 char pprefix[256]; 7094 const PetscInt *globidx, *P_oth_idx; 7095 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7096 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7097 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7098 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7099 /* a base offset; type-2: sparse with a local to global map table */ 7100 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7101 7102 MatProductType ptype; 7103 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7104 PetscMPIInt size; 7105 7106 PetscFunctionBegin; 7107 MatCheckProduct(C, 1); 7108 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7109 ptype = product->type; 7110 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7111 ptype = MATPRODUCT_AB; 7112 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7113 } 7114 switch (ptype) { 7115 case MATPRODUCT_AB: 7116 A = product->A; 7117 P = product->B; 7118 m = A->rmap->n; 7119 n = P->cmap->n; 7120 M = A->rmap->N; 7121 N = P->cmap->N; 7122 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7123 break; 7124 case MATPRODUCT_AtB: 7125 P = product->A; 7126 A = product->B; 7127 m = P->cmap->n; 7128 n = A->cmap->n; 7129 M = P->cmap->N; 7130 N = A->cmap->N; 7131 hasoffproc = PETSC_TRUE; 7132 break; 7133 case MATPRODUCT_PtAP: 7134 A = product->A; 7135 P = product->B; 7136 m = P->cmap->n; 7137 n = P->cmap->n; 7138 M = P->cmap->N; 7139 N = P->cmap->N; 7140 hasoffproc = PETSC_TRUE; 7141 break; 7142 default: 7143 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7144 } 7145 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7146 if (size == 1) hasoffproc = PETSC_FALSE; 7147 7148 /* defaults */ 7149 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7150 mp[i] = NULL; 7151 mptmp[i] = PETSC_FALSE; 7152 rmapt[i] = -1; 7153 cmapt[i] = -1; 7154 rmapa[i] = NULL; 7155 cmapa[i] = NULL; 7156 } 7157 7158 /* customization */ 7159 PetscCall(PetscNew(&mmdata)); 7160 mmdata->reusesym = product->api_user; 7161 if (ptype == MATPRODUCT_AB) { 7162 if (product->api_user) { 7163 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7164 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7165 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7166 PetscOptionsEnd(); 7167 } else { 7168 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7169 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7170 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7171 PetscOptionsEnd(); 7172 } 7173 } else if (ptype == MATPRODUCT_PtAP) { 7174 if (product->api_user) { 7175 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7176 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7177 PetscOptionsEnd(); 7178 } else { 7179 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7180 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7181 PetscOptionsEnd(); 7182 } 7183 } 7184 a = (Mat_MPIAIJ *)A->data; 7185 p = (Mat_MPIAIJ *)P->data; 7186 PetscCall(MatSetSizes(C, m, n, M, N)); 7187 PetscCall(PetscLayoutSetUp(C->rmap)); 7188 PetscCall(PetscLayoutSetUp(C->cmap)); 7189 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7190 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7191 7192 cp = 0; 7193 switch (ptype) { 7194 case MATPRODUCT_AB: /* A * P */ 7195 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7196 7197 /* A_diag * P_local (merged or not) */ 7198 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7199 /* P is product->B */ 7200 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7201 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7202 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7203 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7204 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7205 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7206 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7207 mp[cp]->product->api_user = product->api_user; 7208 PetscCall(MatProductSetFromOptions(mp[cp])); 7209 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7210 PetscCall(ISGetIndices(glob, &globidx)); 7211 rmapt[cp] = 1; 7212 cmapt[cp] = 2; 7213 cmapa[cp] = globidx; 7214 mptmp[cp] = PETSC_FALSE; 7215 cp++; 7216 } else { /* A_diag * P_diag and A_diag * P_off */ 7217 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7218 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7219 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7220 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7221 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7222 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7223 mp[cp]->product->api_user = product->api_user; 7224 PetscCall(MatProductSetFromOptions(mp[cp])); 7225 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7226 rmapt[cp] = 1; 7227 cmapt[cp] = 1; 7228 mptmp[cp] = PETSC_FALSE; 7229 cp++; 7230 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7231 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7232 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7233 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7234 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7235 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7236 mp[cp]->product->api_user = product->api_user; 7237 PetscCall(MatProductSetFromOptions(mp[cp])); 7238 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7239 rmapt[cp] = 1; 7240 cmapt[cp] = 2; 7241 cmapa[cp] = p->garray; 7242 mptmp[cp] = PETSC_FALSE; 7243 cp++; 7244 } 7245 7246 /* A_off * P_other */ 7247 if (mmdata->P_oth) { 7248 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7249 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7250 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7251 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7252 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7253 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7254 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7255 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7256 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7257 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7258 mp[cp]->product->api_user = product->api_user; 7259 PetscCall(MatProductSetFromOptions(mp[cp])); 7260 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7261 rmapt[cp] = 1; 7262 cmapt[cp] = 2; 7263 cmapa[cp] = P_oth_idx; 7264 mptmp[cp] = PETSC_FALSE; 7265 cp++; 7266 } 7267 break; 7268 7269 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7270 /* A is product->B */ 7271 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7272 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7273 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7274 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7275 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7276 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7277 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7278 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7279 mp[cp]->product->api_user = product->api_user; 7280 PetscCall(MatProductSetFromOptions(mp[cp])); 7281 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7282 PetscCall(ISGetIndices(glob, &globidx)); 7283 rmapt[cp] = 2; 7284 rmapa[cp] = globidx; 7285 cmapt[cp] = 2; 7286 cmapa[cp] = globidx; 7287 mptmp[cp] = PETSC_FALSE; 7288 cp++; 7289 } else { 7290 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7291 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7292 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7293 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7294 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7295 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7296 mp[cp]->product->api_user = product->api_user; 7297 PetscCall(MatProductSetFromOptions(mp[cp])); 7298 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7299 PetscCall(ISGetIndices(glob, &globidx)); 7300 rmapt[cp] = 1; 7301 cmapt[cp] = 2; 7302 cmapa[cp] = globidx; 7303 mptmp[cp] = PETSC_FALSE; 7304 cp++; 7305 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7306 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7307 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7308 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7309 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7310 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7311 mp[cp]->product->api_user = product->api_user; 7312 PetscCall(MatProductSetFromOptions(mp[cp])); 7313 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7314 rmapt[cp] = 2; 7315 rmapa[cp] = p->garray; 7316 cmapt[cp] = 2; 7317 cmapa[cp] = globidx; 7318 mptmp[cp] = PETSC_FALSE; 7319 cp++; 7320 } 7321 break; 7322 case MATPRODUCT_PtAP: 7323 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7324 /* P is product->B */ 7325 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7326 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7327 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7328 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7329 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7330 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7331 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7332 mp[cp]->product->api_user = product->api_user; 7333 PetscCall(MatProductSetFromOptions(mp[cp])); 7334 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7335 PetscCall(ISGetIndices(glob, &globidx)); 7336 rmapt[cp] = 2; 7337 rmapa[cp] = globidx; 7338 cmapt[cp] = 2; 7339 cmapa[cp] = globidx; 7340 mptmp[cp] = PETSC_FALSE; 7341 cp++; 7342 if (mmdata->P_oth) { 7343 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7344 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7345 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7346 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7347 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7348 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7349 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7350 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7351 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7352 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7353 mp[cp]->product->api_user = product->api_user; 7354 PetscCall(MatProductSetFromOptions(mp[cp])); 7355 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7356 mptmp[cp] = PETSC_TRUE; 7357 cp++; 7358 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7359 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7360 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7361 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7362 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7363 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7364 mp[cp]->product->api_user = product->api_user; 7365 PetscCall(MatProductSetFromOptions(mp[cp])); 7366 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7367 rmapt[cp] = 2; 7368 rmapa[cp] = globidx; 7369 cmapt[cp] = 2; 7370 cmapa[cp] = P_oth_idx; 7371 mptmp[cp] = PETSC_FALSE; 7372 cp++; 7373 } 7374 break; 7375 default: 7376 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7377 } 7378 /* sanity check */ 7379 if (size > 1) 7380 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7381 7382 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7383 for (i = 0; i < cp; i++) { 7384 mmdata->mp[i] = mp[i]; 7385 mmdata->mptmp[i] = mptmp[i]; 7386 } 7387 mmdata->cp = cp; 7388 C->product->data = mmdata; 7389 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7390 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7391 7392 /* memory type */ 7393 mmdata->mtype = PETSC_MEMTYPE_HOST; 7394 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7395 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7396 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7397 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7398 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7399 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7400 7401 /* prepare coo coordinates for values insertion */ 7402 7403 /* count total nonzeros of those intermediate seqaij Mats 7404 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7405 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7406 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7407 */ 7408 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7409 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7410 if (mptmp[cp]) continue; 7411 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7412 const PetscInt *rmap = rmapa[cp]; 7413 const PetscInt mr = mp[cp]->rmap->n; 7414 const PetscInt rs = C->rmap->rstart; 7415 const PetscInt re = C->rmap->rend; 7416 const PetscInt *ii = mm->i; 7417 for (i = 0; i < mr; i++) { 7418 const PetscInt gr = rmap[i]; 7419 const PetscInt nz = ii[i + 1] - ii[i]; 7420 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7421 else ncoo_oown += nz; /* this row is local */ 7422 } 7423 } else ncoo_d += mm->nz; 7424 } 7425 7426 /* 7427 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7428 7429 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7430 7431 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7432 7433 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7434 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7435 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7436 7437 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7438 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7439 */ 7440 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7441 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7442 7443 /* gather (i,j) of nonzeros inserted by remote procs */ 7444 if (hasoffproc) { 7445 PetscSF msf; 7446 PetscInt ncoo2, *coo_i2, *coo_j2; 7447 7448 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7449 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7450 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7451 7452 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7453 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7454 PetscInt *idxoff = mmdata->off[cp]; 7455 PetscInt *idxown = mmdata->own[cp]; 7456 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7457 const PetscInt *rmap = rmapa[cp]; 7458 const PetscInt *cmap = cmapa[cp]; 7459 const PetscInt *ii = mm->i; 7460 PetscInt *coi = coo_i + ncoo_o; 7461 PetscInt *coj = coo_j + ncoo_o; 7462 const PetscInt mr = mp[cp]->rmap->n; 7463 const PetscInt rs = C->rmap->rstart; 7464 const PetscInt re = C->rmap->rend; 7465 const PetscInt cs = C->cmap->rstart; 7466 for (i = 0; i < mr; i++) { 7467 const PetscInt *jj = mm->j + ii[i]; 7468 const PetscInt gr = rmap[i]; 7469 const PetscInt nz = ii[i + 1] - ii[i]; 7470 if (gr < rs || gr >= re) { /* this is an offproc row */ 7471 for (j = ii[i]; j < ii[i + 1]; j++) { 7472 *coi++ = gr; 7473 *idxoff++ = j; 7474 } 7475 if (!cmapt[cp]) { /* already global */ 7476 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7477 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7478 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7479 } else { /* offdiag */ 7480 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7481 } 7482 ncoo_o += nz; 7483 } else { /* this is a local row */ 7484 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7485 } 7486 } 7487 } 7488 mmdata->off[cp + 1] = idxoff; 7489 mmdata->own[cp + 1] = idxown; 7490 } 7491 7492 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7493 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7494 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7495 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7496 ncoo = ncoo_d + ncoo_oown + ncoo2; 7497 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7498 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7499 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7500 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7501 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7502 PetscCall(PetscFree2(coo_i, coo_j)); 7503 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7504 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7505 coo_i = coo_i2; 7506 coo_j = coo_j2; 7507 } else { /* no offproc values insertion */ 7508 ncoo = ncoo_d; 7509 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7510 7511 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7512 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7513 PetscCall(PetscSFSetUp(mmdata->sf)); 7514 } 7515 mmdata->hasoffproc = hasoffproc; 7516 7517 /* gather (i,j) of nonzeros inserted locally */ 7518 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7519 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7520 PetscInt *coi = coo_i + ncoo_d; 7521 PetscInt *coj = coo_j + ncoo_d; 7522 const PetscInt *jj = mm->j; 7523 const PetscInt *ii = mm->i; 7524 const PetscInt *cmap = cmapa[cp]; 7525 const PetscInt *rmap = rmapa[cp]; 7526 const PetscInt mr = mp[cp]->rmap->n; 7527 const PetscInt rs = C->rmap->rstart; 7528 const PetscInt re = C->rmap->rend; 7529 const PetscInt cs = C->cmap->rstart; 7530 7531 if (mptmp[cp]) continue; 7532 if (rmapt[cp] == 1) { /* consecutive rows */ 7533 /* fill coo_i */ 7534 for (i = 0; i < mr; i++) { 7535 const PetscInt gr = i + rs; 7536 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7537 } 7538 /* fill coo_j */ 7539 if (!cmapt[cp]) { /* type-0, already global */ 7540 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7541 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7542 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7543 } else { /* type-2, local to global for sparse columns */ 7544 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7545 } 7546 ncoo_d += mm->nz; 7547 } else if (rmapt[cp] == 2) { /* sparse rows */ 7548 for (i = 0; i < mr; i++) { 7549 const PetscInt *jj = mm->j + ii[i]; 7550 const PetscInt gr = rmap[i]; 7551 const PetscInt nz = ii[i + 1] - ii[i]; 7552 if (gr >= rs && gr < re) { /* local rows */ 7553 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7554 if (!cmapt[cp]) { /* type-0, already global */ 7555 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7556 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7557 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7558 } else { /* type-2, local to global for sparse columns */ 7559 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7560 } 7561 ncoo_d += nz; 7562 } 7563 } 7564 } 7565 } 7566 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7567 PetscCall(ISDestroy(&glob)); 7568 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7569 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7570 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7571 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7572 7573 /* preallocate with COO data */ 7574 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7575 PetscCall(PetscFree2(coo_i, coo_j)); 7576 PetscFunctionReturn(PETSC_SUCCESS); 7577 } 7578 7579 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7580 { 7581 Mat_Product *product = mat->product; 7582 #if defined(PETSC_HAVE_DEVICE) 7583 PetscBool match = PETSC_FALSE; 7584 PetscBool usecpu = PETSC_FALSE; 7585 #else 7586 PetscBool match = PETSC_TRUE; 7587 #endif 7588 7589 PetscFunctionBegin; 7590 MatCheckProduct(mat, 1); 7591 #if defined(PETSC_HAVE_DEVICE) 7592 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7593 if (match) { /* we can always fallback to the CPU if requested */ 7594 switch (product->type) { 7595 case MATPRODUCT_AB: 7596 if (product->api_user) { 7597 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7598 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7599 PetscOptionsEnd(); 7600 } else { 7601 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7602 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7603 PetscOptionsEnd(); 7604 } 7605 break; 7606 case MATPRODUCT_AtB: 7607 if (product->api_user) { 7608 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7609 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7610 PetscOptionsEnd(); 7611 } else { 7612 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7613 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7614 PetscOptionsEnd(); 7615 } 7616 break; 7617 case MATPRODUCT_PtAP: 7618 if (product->api_user) { 7619 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7620 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7621 PetscOptionsEnd(); 7622 } else { 7623 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7624 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7625 PetscOptionsEnd(); 7626 } 7627 break; 7628 default: 7629 break; 7630 } 7631 match = (PetscBool)!usecpu; 7632 } 7633 #endif 7634 if (match) { 7635 switch (product->type) { 7636 case MATPRODUCT_AB: 7637 case MATPRODUCT_AtB: 7638 case MATPRODUCT_PtAP: 7639 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7640 break; 7641 default: 7642 break; 7643 } 7644 } 7645 /* fallback to MPIAIJ ops */ 7646 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7647 PetscFunctionReturn(PETSC_SUCCESS); 7648 } 7649 7650 /* 7651 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7652 7653 n - the number of block indices in cc[] 7654 cc - the block indices (must be large enough to contain the indices) 7655 */ 7656 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7657 { 7658 PetscInt cnt = -1, nidx, j; 7659 const PetscInt *idx; 7660 7661 PetscFunctionBegin; 7662 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7663 if (nidx) { 7664 cnt = 0; 7665 cc[cnt] = idx[0] / bs; 7666 for (j = 1; j < nidx; j++) { 7667 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7668 } 7669 } 7670 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7671 *n = cnt + 1; 7672 PetscFunctionReturn(PETSC_SUCCESS); 7673 } 7674 7675 /* 7676 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7677 7678 ncollapsed - the number of block indices 7679 collapsed - the block indices (must be large enough to contain the indices) 7680 */ 7681 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7682 { 7683 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7684 7685 PetscFunctionBegin; 7686 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7687 for (i = start + 1; i < start + bs; i++) { 7688 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7689 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7690 cprevtmp = cprev; 7691 cprev = merged; 7692 merged = cprevtmp; 7693 } 7694 *ncollapsed = nprev; 7695 if (collapsed) *collapsed = cprev; 7696 PetscFunctionReturn(PETSC_SUCCESS); 7697 } 7698 7699 /* 7700 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7701 7702 Input Parameter: 7703 . Amat - matrix 7704 - symmetrize - make the result symmetric 7705 + scale - scale with diagonal 7706 7707 Output Parameter: 7708 . a_Gmat - output scalar graph >= 0 7709 7710 */ 7711 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7712 { 7713 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7714 MPI_Comm comm; 7715 Mat Gmat; 7716 PetscBool ismpiaij, isseqaij; 7717 Mat a, b, c; 7718 MatType jtype; 7719 7720 PetscFunctionBegin; 7721 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7722 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7723 PetscCall(MatGetSize(Amat, &MM, &NN)); 7724 PetscCall(MatGetBlockSize(Amat, &bs)); 7725 nloc = (Iend - Istart) / bs; 7726 7727 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7728 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7729 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7730 7731 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7732 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7733 implementation */ 7734 if (bs > 1) { 7735 PetscCall(MatGetType(Amat, &jtype)); 7736 PetscCall(MatCreate(comm, &Gmat)); 7737 PetscCall(MatSetType(Gmat, jtype)); 7738 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7739 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7740 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7741 PetscInt *d_nnz, *o_nnz; 7742 MatScalar *aa, val, *AA; 7743 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7744 if (isseqaij) { 7745 a = Amat; 7746 b = NULL; 7747 } else { 7748 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7749 a = d->A; 7750 b = d->B; 7751 } 7752 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7753 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7754 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7755 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7756 const PetscInt *cols1, *cols2; 7757 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7758 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7759 nnz[brow / bs] = nc2 / bs; 7760 if (nc2 % bs) ok = 0; 7761 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7762 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7763 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7764 if (nc1 != nc2) ok = 0; 7765 else { 7766 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7767 if (cols1[jj] != cols2[jj]) ok = 0; 7768 if (cols1[jj] % bs != jj % bs) ok = 0; 7769 } 7770 } 7771 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7772 } 7773 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7774 if (!ok) { 7775 PetscCall(PetscFree2(d_nnz, o_nnz)); 7776 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7777 goto old_bs; 7778 } 7779 } 7780 } 7781 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7782 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7783 PetscCall(PetscFree2(d_nnz, o_nnz)); 7784 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7785 // diag 7786 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7787 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7788 ai = aseq->i; 7789 n = ai[brow + 1] - ai[brow]; 7790 aj = aseq->j + ai[brow]; 7791 for (int k = 0; k < n; k += bs) { // block columns 7792 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7793 val = 0; 7794 if (index_size == 0) { 7795 for (int ii = 0; ii < bs; ii++) { // rows in block 7796 aa = aseq->a + ai[brow + ii] + k; 7797 for (int jj = 0; jj < bs; jj++) { // columns in block 7798 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7799 } 7800 } 7801 } else { // use (index,index) value if provided 7802 for (int iii = 0; iii < index_size; iii++) { // rows in block 7803 int ii = index[iii]; 7804 aa = aseq->a + ai[brow + ii] + k; 7805 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7806 int jj = index[jjj]; 7807 val = PetscAbs(PetscRealPart(aa[jj])); 7808 } 7809 } 7810 } 7811 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7812 AA[k / bs] = val; 7813 } 7814 grow = Istart / bs + brow / bs; 7815 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7816 } 7817 // off-diag 7818 if (ismpiaij) { 7819 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7820 const PetscScalar *vals; 7821 const PetscInt *cols, *garray = aij->garray; 7822 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7823 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7824 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7825 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7826 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7827 AA[k / bs] = 0; 7828 AJ[cidx] = garray[cols[k]] / bs; 7829 } 7830 nc = ncols / bs; 7831 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7832 if (index_size == 0) { 7833 for (int ii = 0; ii < bs; ii++) { // rows in block 7834 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7835 for (int k = 0; k < ncols; k += bs) { 7836 for (int jj = 0; jj < bs; jj++) { // cols in block 7837 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7838 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7839 } 7840 } 7841 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7842 } 7843 } else { // use (index,index) value if provided 7844 for (int iii = 0; iii < index_size; iii++) { // rows in block 7845 int ii = index[iii]; 7846 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7847 for (int k = 0; k < ncols; k += bs) { 7848 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7849 int jj = index[jjj]; 7850 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7851 } 7852 } 7853 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7854 } 7855 } 7856 grow = Istart / bs + brow / bs; 7857 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7858 } 7859 } 7860 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7861 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7862 PetscCall(PetscFree2(AA, AJ)); 7863 } else { 7864 const PetscScalar *vals; 7865 const PetscInt *idx; 7866 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7867 old_bs: 7868 /* 7869 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7870 */ 7871 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7872 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7873 if (isseqaij) { 7874 PetscInt max_d_nnz; 7875 /* 7876 Determine exact preallocation count for (sequential) scalar matrix 7877 */ 7878 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7879 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7880 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7881 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7882 PetscCall(PetscFree3(w0, w1, w2)); 7883 } else if (ismpiaij) { 7884 Mat Daij, Oaij; 7885 const PetscInt *garray; 7886 PetscInt max_d_nnz; 7887 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7888 /* 7889 Determine exact preallocation count for diagonal block portion of scalar matrix 7890 */ 7891 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7892 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7893 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7894 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7895 PetscCall(PetscFree3(w0, w1, w2)); 7896 /* 7897 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7898 */ 7899 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7900 o_nnz[jj] = 0; 7901 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7902 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7903 o_nnz[jj] += ncols; 7904 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7905 } 7906 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7907 } 7908 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7909 /* get scalar copy (norms) of matrix */ 7910 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7911 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7912 PetscCall(PetscFree2(d_nnz, o_nnz)); 7913 for (Ii = Istart; Ii < Iend; Ii++) { 7914 PetscInt dest_row = Ii / bs; 7915 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7916 for (jj = 0; jj < ncols; jj++) { 7917 PetscInt dest_col = idx[jj] / bs; 7918 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7919 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7920 } 7921 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7922 } 7923 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7924 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7925 } 7926 } else { 7927 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7928 else { 7929 Gmat = Amat; 7930 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7931 } 7932 if (isseqaij) { 7933 a = Gmat; 7934 b = NULL; 7935 } else { 7936 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7937 a = d->A; 7938 b = d->B; 7939 } 7940 if (filter >= 0 || scale) { 7941 /* take absolute value of each entry */ 7942 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7943 MatInfo info; 7944 PetscScalar *avals; 7945 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7946 PetscCall(MatSeqAIJGetArray(c, &avals)); 7947 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7948 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7949 } 7950 } 7951 } 7952 if (symmetrize) { 7953 PetscBool isset, issym; 7954 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7955 if (!isset || !issym) { 7956 Mat matTrans; 7957 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7958 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7959 PetscCall(MatDestroy(&matTrans)); 7960 } 7961 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7962 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7963 if (scale) { 7964 /* scale c for all diagonal values = 1 or -1 */ 7965 Vec diag; 7966 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7967 PetscCall(MatGetDiagonal(Gmat, diag)); 7968 PetscCall(VecReciprocal(diag)); 7969 PetscCall(VecSqrtAbs(diag)); 7970 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7971 PetscCall(VecDestroy(&diag)); 7972 } 7973 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7974 7975 if (filter >= 0) { 7976 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 7977 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 7978 } 7979 *a_Gmat = Gmat; 7980 PetscFunctionReturn(PETSC_SUCCESS); 7981 } 7982 7983 /* 7984 Special version for direct calls from Fortran 7985 */ 7986 #include <petsc/private/fortranimpl.h> 7987 7988 /* Change these macros so can be used in void function */ 7989 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7990 #undef PetscCall 7991 #define PetscCall(...) \ 7992 do { \ 7993 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7994 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7995 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 7996 return; \ 7997 } \ 7998 } while (0) 7999 8000 #undef SETERRQ 8001 #define SETERRQ(comm, ierr, ...) \ 8002 do { \ 8003 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8004 return; \ 8005 } while (0) 8006 8007 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8008 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8009 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8010 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8011 #else 8012 #endif 8013 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8014 { 8015 Mat mat = *mmat; 8016 PetscInt m = *mm, n = *mn; 8017 InsertMode addv = *maddv; 8018 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8019 PetscScalar value; 8020 8021 MatCheckPreallocated(mat, 1); 8022 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8023 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8024 { 8025 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8026 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8027 PetscBool roworiented = aij->roworiented; 8028 8029 /* Some Variables required in the macro */ 8030 Mat A = aij->A; 8031 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8032 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8033 MatScalar *aa; 8034 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8035 Mat B = aij->B; 8036 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8037 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8038 MatScalar *ba; 8039 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8040 * cannot use "#if defined" inside a macro. */ 8041 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8042 8043 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8044 PetscInt nonew = a->nonew; 8045 MatScalar *ap1, *ap2; 8046 8047 PetscFunctionBegin; 8048 PetscCall(MatSeqAIJGetArray(A, &aa)); 8049 PetscCall(MatSeqAIJGetArray(B, &ba)); 8050 for (i = 0; i < m; i++) { 8051 if (im[i] < 0) continue; 8052 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8053 if (im[i] >= rstart && im[i] < rend) { 8054 row = im[i] - rstart; 8055 lastcol1 = -1; 8056 rp1 = aj + ai[row]; 8057 ap1 = aa + ai[row]; 8058 rmax1 = aimax[row]; 8059 nrow1 = ailen[row]; 8060 low1 = 0; 8061 high1 = nrow1; 8062 lastcol2 = -1; 8063 rp2 = bj + bi[row]; 8064 ap2 = ba + bi[row]; 8065 rmax2 = bimax[row]; 8066 nrow2 = bilen[row]; 8067 low2 = 0; 8068 high2 = nrow2; 8069 8070 for (j = 0; j < n; j++) { 8071 if (roworiented) value = v[i * n + j]; 8072 else value = v[i + j * m]; 8073 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8074 if (in[j] >= cstart && in[j] < cend) { 8075 col = in[j] - cstart; 8076 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8077 } else if (in[j] < 0) continue; 8078 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8079 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8080 } else { 8081 if (mat->was_assembled) { 8082 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8083 #if defined(PETSC_USE_CTABLE) 8084 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8085 col--; 8086 #else 8087 col = aij->colmap[in[j]] - 1; 8088 #endif 8089 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8090 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8091 col = in[j]; 8092 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8093 B = aij->B; 8094 b = (Mat_SeqAIJ *)B->data; 8095 bimax = b->imax; 8096 bi = b->i; 8097 bilen = b->ilen; 8098 bj = b->j; 8099 rp2 = bj + bi[row]; 8100 ap2 = ba + bi[row]; 8101 rmax2 = bimax[row]; 8102 nrow2 = bilen[row]; 8103 low2 = 0; 8104 high2 = nrow2; 8105 bm = aij->B->rmap->n; 8106 ba = b->a; 8107 inserted = PETSC_FALSE; 8108 } 8109 } else col = in[j]; 8110 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8111 } 8112 } 8113 } else if (!aij->donotstash) { 8114 if (roworiented) { 8115 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8116 } else { 8117 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8118 } 8119 } 8120 } 8121 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8122 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8123 } 8124 PetscFunctionReturnVoid(); 8125 } 8126 8127 /* Undefining these here since they were redefined from their original definition above! No 8128 * other PETSc functions should be defined past this point, as it is impossible to recover the 8129 * original definitions */ 8130 #undef PetscCall 8131 #undef SETERRQ 8132