1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 167 PetscFunctionReturn(PETSC_SUCCESS); 168 } 169 170 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 171 { 172 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 173 174 PetscFunctionBegin; 175 if (mat->A) { 176 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 177 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 178 } 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181 182 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 183 { 184 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 186 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 187 const PetscInt *ia, *ib; 188 const MatScalar *aa, *bb, *aav, *bav; 189 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 190 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 191 192 PetscFunctionBegin; 193 *keptrows = NULL; 194 195 ia = a->i; 196 ib = b->i; 197 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 198 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 199 for (i = 0; i < m; i++) { 200 na = ia[i + 1] - ia[i]; 201 nb = ib[i + 1] - ib[i]; 202 if (!na && !nb) { 203 cnt++; 204 goto ok1; 205 } 206 aa = aav + ia[i]; 207 for (j = 0; j < na; j++) { 208 if (aa[j] != 0.0) goto ok1; 209 } 210 bb = bav + ib[i]; 211 for (j = 0; j < nb; j++) { 212 if (bb[j] != 0.0) goto ok1; 213 } 214 cnt++; 215 ok1:; 216 } 217 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 218 if (!n0rows) { 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 220 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 221 PetscFunctionReturn(PETSC_SUCCESS); 222 } 223 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 224 cnt = 0; 225 for (i = 0; i < m; i++) { 226 na = ia[i + 1] - ia[i]; 227 nb = ib[i + 1] - ib[i]; 228 if (!na && !nb) continue; 229 aa = aav + ia[i]; 230 for (j = 0; j < na; j++) { 231 if (aa[j] != 0.0) { 232 rows[cnt++] = rstart + i; 233 goto ok2; 234 } 235 } 236 bb = bav + ib[i]; 237 for (j = 0; j < nb; j++) { 238 if (bb[j] != 0.0) { 239 rows[cnt++] = rstart + i; 240 goto ok2; 241 } 242 } 243 ok2:; 244 } 245 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 247 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 252 { 253 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 254 PetscBool cong; 255 256 PetscFunctionBegin; 257 PetscCall(MatHasCongruentLayouts(Y, &cong)); 258 if (Y->assembled && cong) { 259 PetscCall(MatDiagonalSet(aij->A, D, is)); 260 } else { 261 PetscCall(MatDiagonalSet_Default(Y, D, is)); 262 } 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 267 { 268 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 269 PetscInt i, rstart, nrows, *rows; 270 271 PetscFunctionBegin; 272 *zrows = NULL; 273 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 274 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 275 for (i = 0; i < nrows; i++) rows[i] += rstart; 276 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 283 PetscInt i, m, n, *garray = aij->garray; 284 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 285 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 286 PetscReal *work; 287 const PetscScalar *dummy; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 if (type == NORM_INFINITY) { 313 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 314 } else { 315 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 316 } 317 PetscCall(PetscFree(work)); 318 if (type == NORM_2) { 319 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 320 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 321 for (i = 0; i < n; i++) reductions[i] /= m; 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 327 { 328 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 329 IS sis, gis; 330 const PetscInt *isis, *igis; 331 PetscInt n, *iis, nsis, ngis, rstart, i; 332 333 PetscFunctionBegin; 334 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 335 PetscCall(MatFindNonzeroRows(a->B, &gis)); 336 PetscCall(ISGetSize(gis, &ngis)); 337 PetscCall(ISGetSize(sis, &nsis)); 338 PetscCall(ISGetIndices(sis, &isis)); 339 PetscCall(ISGetIndices(gis, &igis)); 340 341 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 342 PetscCall(PetscArraycpy(iis, igis, ngis)); 343 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 344 n = ngis + nsis; 345 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 346 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 347 for (i = 0; i < n; i++) iis[i] += rstart; 348 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 349 350 PetscCall(ISRestoreIndices(sis, &isis)); 351 PetscCall(ISRestoreIndices(gis, &igis)); 352 PetscCall(ISDestroy(&sis)); 353 PetscCall(ISDestroy(&gis)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 /* 358 Local utility routine that creates a mapping from the global column 359 number to the local number in the off-diagonal part of the local 360 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 361 a slightly higher hash table cost; without it it is not scalable (each processor 362 has an order N integer array but is fast to access. 363 */ 364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 365 { 366 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 367 PetscInt n = aij->B->cmap->n, i; 368 369 PetscFunctionBegin; 370 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 371 #if defined(PETSC_USE_CTABLE) 372 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 373 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 374 #else 375 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 376 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 377 #endif 378 PetscFunctionReturn(PETSC_SUCCESS); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 382 do { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col; \ 386 while (high1 - low1 > 5) { \ 387 t = (low1 + high1) / 2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i = low1; _i < high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) { \ 395 ap1[_i] += value; \ 396 /* Not sure LogFlops will slow dow the code or not */ \ 397 (void)PetscLogFlops(1.0); \ 398 } else ap1[_i] = value; \ 399 goto a_noinsert; \ 400 } \ 401 } \ 402 if (value == 0.0 && ignorezeroentries && row != col) { \ 403 low1 = 0; \ 404 high1 = nrow1; \ 405 goto a_noinsert; \ 406 } \ 407 if (nonew == 1) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 413 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 414 N = nrow1++ - 1; \ 415 a->nz++; \ 416 high1++; \ 417 /* shift up all the later entries in this row */ \ 418 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 419 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 B->nonzerostate++; \ 468 b_noinsert:; \ 469 bilen[row] = nrow2; \ 470 } while (0) 471 472 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 473 { 474 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 475 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 476 PetscInt l, *garray = mat->garray, diag; 477 PetscScalar *aa, *ba; 478 479 PetscFunctionBegin; 480 /* code only works for square matrices A */ 481 482 /* find size of row to the left of the diagonal part */ 483 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 484 row = row - diag; 485 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 486 if (garray[b->j[b->i[row] + l]] > diag) break; 487 } 488 if (l) { 489 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 490 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 491 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 492 } 493 494 /* diagonal part */ 495 if (a->i[row + 1] - a->i[row]) { 496 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 497 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 498 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 499 } 500 501 /* right of diagonal part */ 502 if (b->i[row + 1] - b->i[row] - l) { 503 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 504 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 505 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 506 } 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 511 { 512 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 513 PetscScalar value = 0.0; 514 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 515 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 516 PetscBool roworiented = aij->roworiented; 517 518 /* Some Variables required in the macro */ 519 Mat A = aij->A; 520 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 521 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 522 PetscBool ignorezeroentries = a->ignorezeroentries; 523 Mat B = aij->B; 524 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 525 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 526 MatScalar *aa, *ba; 527 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 528 PetscInt nonew; 529 MatScalar *ap1, *ap2; 530 531 PetscFunctionBegin; 532 PetscCall(MatSeqAIJGetArray(A, &aa)); 533 PetscCall(MatSeqAIJGetArray(B, &ba)); 534 for (i = 0; i < m; i++) { 535 if (im[i] < 0) continue; 536 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 537 if (im[i] >= rstart && im[i] < rend) { 538 row = im[i] - rstart; 539 lastcol1 = -1; 540 rp1 = aj + ai[row]; 541 ap1 = aa + ai[row]; 542 rmax1 = aimax[row]; 543 nrow1 = ailen[row]; 544 low1 = 0; 545 high1 = nrow1; 546 lastcol2 = -1; 547 rp2 = bj + bi[row]; 548 ap2 = ba + bi[row]; 549 rmax2 = bimax[row]; 550 nrow2 = bilen[row]; 551 low2 = 0; 552 high2 = nrow2; 553 554 for (j = 0; j < n; j++) { 555 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 556 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 557 if (in[j] >= cstart && in[j] < cend) { 558 col = in[j] - cstart; 559 nonew = a->nonew; 560 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 561 } else if (in[j] < 0) { 562 continue; 563 } else { 564 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 565 if (mat->was_assembled) { 566 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 567 #if defined(PETSC_USE_CTABLE) 568 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 569 col--; 570 #else 571 col = aij->colmap[in[j]] - 1; 572 #endif 573 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 574 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 575 col = in[j]; 576 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 577 B = aij->B; 578 b = (Mat_SeqAIJ *)B->data; 579 bimax = b->imax; 580 bi = b->i; 581 bilen = b->ilen; 582 bj = b->j; 583 ba = b->a; 584 rp2 = bj + bi[row]; 585 ap2 = ba + bi[row]; 586 rmax2 = bimax[row]; 587 nrow2 = bilen[row]; 588 low2 = 0; 589 high2 = nrow2; 590 bm = aij->B->rmap->n; 591 ba = b->a; 592 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 593 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 594 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 595 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 596 } 597 } else col = in[j]; 598 nonew = b->nonew; 599 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 600 } 601 } 602 } else { 603 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 604 if (!aij->donotstash) { 605 mat->assembled = PETSC_FALSE; 606 if (roworiented) { 607 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 608 } else { 609 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 610 } 611 } 612 } 613 } 614 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 615 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 /* 620 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 621 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 622 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 623 */ 624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 627 Mat A = aij->A; /* diagonal part of the matrix */ 628 Mat B = aij->B; /* offdiagonal part of the matrix */ 629 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 630 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 632 PetscInt *ailen = a->ilen, *aj = a->j; 633 PetscInt *bilen = b->ilen, *bj = b->j; 634 PetscInt am = aij->A->rmap->n, j; 635 PetscInt diag_so_far = 0, dnz; 636 PetscInt offd_so_far = 0, onz; 637 638 PetscFunctionBegin; 639 /* Iterate over all rows of the matrix */ 640 for (j = 0; j < am; j++) { 641 dnz = onz = 0; 642 /* Iterate over all non-zero columns of the current row */ 643 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 644 /* If column is in the diagonal */ 645 if (mat_j[col] >= cstart && mat_j[col] < cend) { 646 aj[diag_so_far++] = mat_j[col] - cstart; 647 dnz++; 648 } else { /* off-diagonal entries */ 649 bj[offd_so_far++] = mat_j[col]; 650 onz++; 651 } 652 } 653 ailen[j] = dnz; 654 bilen[j] = onz; 655 } 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 /* 660 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 661 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 662 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 663 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 664 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 665 */ 666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 669 Mat A = aij->A; /* diagonal part of the matrix */ 670 Mat B = aij->B; /* offdiagonal part of the matrix */ 671 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 673 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 674 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 675 PetscInt *ailen = a->ilen, *aj = a->j; 676 PetscInt *bilen = b->ilen, *bj = b->j; 677 PetscInt am = aij->A->rmap->n, j; 678 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 679 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 680 PetscScalar *aa = a->a, *ba = b->a; 681 682 PetscFunctionBegin; 683 /* Iterate over all rows of the matrix */ 684 for (j = 0; j < am; j++) { 685 dnz_row = onz_row = 0; 686 rowstart_offd = full_offd_i[j]; 687 rowstart_diag = full_diag_i[j]; 688 /* Iterate over all non-zero columns of the current row */ 689 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 690 /* If column is in the diagonal */ 691 if (mat_j[col] >= cstart && mat_j[col] < cend) { 692 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 693 aa[rowstart_diag + dnz_row] = mat_a[col]; 694 dnz_row++; 695 } else { /* off-diagonal entries */ 696 bj[rowstart_offd + onz_row] = mat_j[col]; 697 ba[rowstart_offd + onz_row] = mat_a[col]; 698 onz_row++; 699 } 700 } 701 ailen[j] = dnz_row; 702 bilen[j] = onz_row; 703 } 704 PetscFunctionReturn(PETSC_SUCCESS); 705 } 706 707 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 708 { 709 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 710 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 711 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 712 713 PetscFunctionBegin; 714 for (i = 0; i < m; i++) { 715 if (idxm[i] < 0) continue; /* negative row */ 716 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 717 if (idxm[i] >= rstart && idxm[i] < rend) { 718 row = idxm[i] - rstart; 719 for (j = 0; j < n; j++) { 720 if (idxn[j] < 0) continue; /* negative column */ 721 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 722 if (idxn[j] >= cstart && idxn[j] < cend) { 723 col = idxn[j] - cstart; 724 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 725 } else { 726 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 727 #if defined(PETSC_USE_CTABLE) 728 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 729 col--; 730 #else 731 col = aij->colmap[idxn[j]] - 1; 732 #endif 733 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 734 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 735 } 736 } 737 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 738 } 739 PetscFunctionReturn(PETSC_SUCCESS); 740 } 741 742 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 745 PetscInt nstash, reallocs; 746 747 PetscFunctionBegin; 748 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 749 750 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 751 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 752 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 753 PetscFunctionReturn(PETSC_SUCCESS); 754 } 755 756 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 757 { 758 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 759 PetscMPIInt n; 760 PetscInt i, j, rstart, ncols, flg; 761 PetscInt *row, *col; 762 PetscBool other_disassembled; 763 PetscScalar *val; 764 765 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 766 767 PetscFunctionBegin; 768 if (!aij->donotstash && !mat->nooffprocentries) { 769 while (1) { 770 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 771 if (!flg) break; 772 773 for (i = 0; i < n;) { 774 /* Now identify the consecutive vals belonging to the same row */ 775 for (j = i, rstart = row[j]; j < n; j++) { 776 if (row[j] != rstart) break; 777 } 778 if (j < n) ncols = j - i; 779 else ncols = n - i; 780 /* Now assemble all these values with a single function call */ 781 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 782 i = j; 783 } 784 } 785 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 786 } 787 #if defined(PETSC_HAVE_DEVICE) 788 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 789 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 790 if (mat->boundtocpu) { 791 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 792 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 793 } 794 #endif 795 PetscCall(MatAssemblyBegin(aij->A, mode)); 796 PetscCall(MatAssemblyEnd(aij->A, mode)); 797 798 /* determine if any processor has disassembled, if so we must 799 also disassemble ourself, in order that we may reassemble. */ 800 /* 801 if nonzero structure of submatrix B cannot change then we know that 802 no processor disassembled thus we can skip this stuff 803 */ 804 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 805 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 806 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 807 PetscCall(MatDisAssemble_MPIAIJ(mat)); 808 } 809 } 810 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 811 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 812 #if defined(PETSC_HAVE_DEVICE) 813 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 814 #endif 815 PetscCall(MatAssemblyBegin(aij->B, mode)); 816 PetscCall(MatAssemblyEnd(aij->B, mode)); 817 818 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 819 820 aij->rowvalues = NULL; 821 822 PetscCall(VecDestroy(&aij->diag)); 823 824 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 825 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 826 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 827 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 828 } 829 #if defined(PETSC_HAVE_DEVICE) 830 mat->offloadmask = PETSC_OFFLOAD_BOTH; 831 #endif 832 PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 836 { 837 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 838 839 PetscFunctionBegin; 840 PetscCall(MatZeroEntries(l->A)); 841 PetscCall(MatZeroEntries(l->B)); 842 PetscFunctionReturn(PETSC_SUCCESS); 843 } 844 845 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 846 { 847 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 848 PetscObjectState sA, sB; 849 PetscInt *lrows; 850 PetscInt r, len; 851 PetscBool cong, lch, gch; 852 853 PetscFunctionBegin; 854 /* get locally owned rows */ 855 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 856 PetscCall(MatHasCongruentLayouts(A, &cong)); 857 /* fix right hand side if needed */ 858 if (x && b) { 859 const PetscScalar *xx; 860 PetscScalar *bb; 861 862 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 863 PetscCall(VecGetArrayRead(x, &xx)); 864 PetscCall(VecGetArray(b, &bb)); 865 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 866 PetscCall(VecRestoreArrayRead(x, &xx)); 867 PetscCall(VecRestoreArray(b, &bb)); 868 } 869 870 sA = mat->A->nonzerostate; 871 sB = mat->B->nonzerostate; 872 873 if (diag != 0.0 && cong) { 874 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 875 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 876 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 877 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 878 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 879 PetscInt nnwA, nnwB; 880 PetscBool nnzA, nnzB; 881 882 nnwA = aijA->nonew; 883 nnwB = aijB->nonew; 884 nnzA = aijA->keepnonzeropattern; 885 nnzB = aijB->keepnonzeropattern; 886 if (!nnzA) { 887 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 888 aijA->nonew = 0; 889 } 890 if (!nnzB) { 891 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 892 aijB->nonew = 0; 893 } 894 /* Must zero here before the next loop */ 895 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 896 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 897 for (r = 0; r < len; ++r) { 898 const PetscInt row = lrows[r] + A->rmap->rstart; 899 if (row >= A->cmap->N) continue; 900 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 901 } 902 aijA->nonew = nnwA; 903 aijB->nonew = nnwB; 904 } else { 905 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 906 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 907 } 908 PetscCall(PetscFree(lrows)); 909 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 910 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 911 912 /* reduce nonzerostate */ 913 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 914 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 915 if (gch) A->nonzerostate++; 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 919 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 920 { 921 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 922 PetscMPIInt n = A->rmap->n; 923 PetscInt i, j, r, m, len = 0; 924 PetscInt *lrows, *owners = A->rmap->range; 925 PetscMPIInt p = 0; 926 PetscSFNode *rrows; 927 PetscSF sf; 928 const PetscScalar *xx; 929 PetscScalar *bb, *mask, *aij_a; 930 Vec xmask, lmask; 931 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 932 const PetscInt *aj, *ii, *ridx; 933 PetscScalar *aa; 934 935 PetscFunctionBegin; 936 /* Create SF where leaves are input rows and roots are owned rows */ 937 PetscCall(PetscMalloc1(n, &lrows)); 938 for (r = 0; r < n; ++r) lrows[r] = -1; 939 PetscCall(PetscMalloc1(N, &rrows)); 940 for (r = 0; r < N; ++r) { 941 const PetscInt idx = rows[r]; 942 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 943 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 944 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 945 } 946 rrows[r].rank = p; 947 rrows[r].index = rows[r] - owners[p]; 948 } 949 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 950 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 951 /* Collect flags for rows to be zeroed */ 952 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 953 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 954 PetscCall(PetscSFDestroy(&sf)); 955 /* Compress and put in row numbers */ 956 for (r = 0; r < n; ++r) 957 if (lrows[r] >= 0) lrows[len++] = r; 958 /* zero diagonal part of matrix */ 959 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 960 /* handle off diagonal part of matrix */ 961 PetscCall(MatCreateVecs(A, &xmask, NULL)); 962 PetscCall(VecDuplicate(l->lvec, &lmask)); 963 PetscCall(VecGetArray(xmask, &bb)); 964 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 965 PetscCall(VecRestoreArray(xmask, &bb)); 966 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 967 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 968 PetscCall(VecDestroy(&xmask)); 969 if (x && b) { /* this code is buggy when the row and column layout don't match */ 970 PetscBool cong; 971 972 PetscCall(MatHasCongruentLayouts(A, &cong)); 973 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 974 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 975 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 976 PetscCall(VecGetArrayRead(l->lvec, &xx)); 977 PetscCall(VecGetArray(b, &bb)); 978 } 979 PetscCall(VecGetArray(lmask, &mask)); 980 /* remove zeroed rows of off diagonal matrix */ 981 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 982 ii = aij->i; 983 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 984 /* loop over all elements of off process part of matrix zeroing removed columns*/ 985 if (aij->compressedrow.use) { 986 m = aij->compressedrow.nrows; 987 ii = aij->compressedrow.i; 988 ridx = aij->compressedrow.rindex; 989 for (i = 0; i < m; i++) { 990 n = ii[i + 1] - ii[i]; 991 aj = aij->j + ii[i]; 992 aa = aij_a + ii[i]; 993 994 for (j = 0; j < n; j++) { 995 if (PetscAbsScalar(mask[*aj])) { 996 if (b) bb[*ridx] -= *aa * xx[*aj]; 997 *aa = 0.0; 998 } 999 aa++; 1000 aj++; 1001 } 1002 ridx++; 1003 } 1004 } else { /* do not use compressed row format */ 1005 m = l->B->rmap->n; 1006 for (i = 0; i < m; i++) { 1007 n = ii[i + 1] - ii[i]; 1008 aj = aij->j + ii[i]; 1009 aa = aij_a + ii[i]; 1010 for (j = 0; j < n; j++) { 1011 if (PetscAbsScalar(mask[*aj])) { 1012 if (b) bb[i] -= *aa * xx[*aj]; 1013 *aa = 0.0; 1014 } 1015 aa++; 1016 aj++; 1017 } 1018 } 1019 } 1020 if (x && b) { 1021 PetscCall(VecRestoreArray(b, &bb)); 1022 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1023 } 1024 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1025 PetscCall(VecRestoreArray(lmask, &mask)); 1026 PetscCall(VecDestroy(&lmask)); 1027 PetscCall(PetscFree(lrows)); 1028 1029 /* only change matrix nonzero state if pattern was allowed to be changed */ 1030 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1031 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1032 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1033 } 1034 PetscFunctionReturn(PETSC_SUCCESS); 1035 } 1036 1037 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1038 { 1039 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1040 PetscInt nt; 1041 VecScatter Mvctx = a->Mvctx; 1042 1043 PetscFunctionBegin; 1044 PetscCall(VecGetLocalSize(xx, &nt)); 1045 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1046 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1047 PetscUseTypeMethod(a->A, mult, xx, yy); 1048 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1049 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1050 PetscFunctionReturn(PETSC_SUCCESS); 1051 } 1052 1053 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1054 { 1055 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1056 1057 PetscFunctionBegin; 1058 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1059 PetscFunctionReturn(PETSC_SUCCESS); 1060 } 1061 1062 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1063 { 1064 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1065 VecScatter Mvctx = a->Mvctx; 1066 1067 PetscFunctionBegin; 1068 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1069 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1070 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1071 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1072 PetscFunctionReturn(PETSC_SUCCESS); 1073 } 1074 1075 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1078 1079 PetscFunctionBegin; 1080 /* do nondiagonal part */ 1081 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1082 /* do local part */ 1083 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1084 /* add partial results together */ 1085 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1086 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1091 { 1092 MPI_Comm comm; 1093 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1094 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1095 IS Me, Notme; 1096 PetscInt M, N, first, last, *notme, i; 1097 PetscBool lf; 1098 PetscMPIInt size; 1099 1100 PetscFunctionBegin; 1101 /* Easy test: symmetric diagonal block */ 1102 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1103 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1104 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1105 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1106 PetscCallMPI(MPI_Comm_size(comm, &size)); 1107 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1108 1109 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1110 PetscCall(MatGetSize(Amat, &M, &N)); 1111 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1112 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1113 for (i = 0; i < first; i++) notme[i] = i; 1114 for (i = last; i < M; i++) notme[i - last + first] = i; 1115 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1116 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1117 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1118 Aoff = Aoffs[0]; 1119 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1120 Boff = Boffs[0]; 1121 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1122 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1123 PetscCall(MatDestroyMatrices(1, &Boffs)); 1124 PetscCall(ISDestroy(&Me)); 1125 PetscCall(ISDestroy(&Notme)); 1126 PetscCall(PetscFree(notme)); 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1131 { 1132 PetscFunctionBegin; 1133 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1134 PetscFunctionReturn(PETSC_SUCCESS); 1135 } 1136 1137 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1138 { 1139 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1140 1141 PetscFunctionBegin; 1142 /* do nondiagonal part */ 1143 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1144 /* do local part */ 1145 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1146 /* add partial results together */ 1147 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1148 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1149 PetscFunctionReturn(PETSC_SUCCESS); 1150 } 1151 1152 /* 1153 This only works correctly for square matrices where the subblock A->A is the 1154 diagonal block 1155 */ 1156 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1157 { 1158 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1159 1160 PetscFunctionBegin; 1161 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1162 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1163 PetscCall(MatGetDiagonal(a->A, v)); 1164 PetscFunctionReturn(PETSC_SUCCESS); 1165 } 1166 1167 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1168 { 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1170 1171 PetscFunctionBegin; 1172 PetscCall(MatScale(a->A, aa)); 1173 PetscCall(MatScale(a->B, aa)); 1174 PetscFunctionReturn(PETSC_SUCCESS); 1175 } 1176 1177 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1178 { 1179 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1180 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1181 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1182 const PetscInt *garray = aij->garray; 1183 const PetscScalar *aa, *ba; 1184 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1185 PetscInt64 nz, hnz; 1186 PetscInt *rowlens; 1187 PetscInt *colidxs; 1188 PetscScalar *matvals; 1189 PetscMPIInt rank; 1190 1191 PetscFunctionBegin; 1192 PetscCall(PetscViewerSetUp(viewer)); 1193 1194 M = mat->rmap->N; 1195 N = mat->cmap->N; 1196 m = mat->rmap->n; 1197 rs = mat->rmap->rstart; 1198 cs = mat->cmap->rstart; 1199 nz = A->nz + B->nz; 1200 1201 /* write matrix header */ 1202 header[0] = MAT_FILE_CLASSID; 1203 header[1] = M; 1204 header[2] = N; 1205 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1206 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1207 if (rank == 0) { 1208 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1209 else header[3] = (PetscInt)hnz; 1210 } 1211 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1212 1213 /* fill in and store row lengths */ 1214 PetscCall(PetscMalloc1(m, &rowlens)); 1215 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1216 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1217 PetscCall(PetscFree(rowlens)); 1218 1219 /* fill in and store column indices */ 1220 PetscCall(PetscMalloc1(nz, &colidxs)); 1221 for (cnt = 0, i = 0; i < m; i++) { 1222 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1223 if (garray[B->j[jb]] > cs) break; 1224 colidxs[cnt++] = garray[B->j[jb]]; 1225 } 1226 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1227 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1228 } 1229 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1230 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1231 PetscCall(PetscFree(colidxs)); 1232 1233 /* fill in and store nonzero values */ 1234 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1235 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1236 PetscCall(PetscMalloc1(nz, &matvals)); 1237 for (cnt = 0, i = 0; i < m; i++) { 1238 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1239 if (garray[B->j[jb]] > cs) break; 1240 matvals[cnt++] = ba[jb]; 1241 } 1242 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1243 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1244 } 1245 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1246 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1247 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1248 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1249 PetscCall(PetscFree(matvals)); 1250 1251 /* write block size option to the viewer's .info file */ 1252 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1253 PetscFunctionReturn(PETSC_SUCCESS); 1254 } 1255 1256 #include <petscdraw.h> 1257 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1258 { 1259 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1260 PetscMPIInt rank = aij->rank, size = aij->size; 1261 PetscBool isdraw, iascii, isbinary; 1262 PetscViewer sviewer; 1263 PetscViewerFormat format; 1264 1265 PetscFunctionBegin; 1266 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1267 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1268 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1269 if (iascii) { 1270 PetscCall(PetscViewerGetFormat(viewer, &format)); 1271 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1272 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1273 PetscCall(PetscMalloc1(size, &nz)); 1274 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1275 for (i = 0; i < (PetscInt)size; i++) { 1276 nmax = PetscMax(nmax, nz[i]); 1277 nmin = PetscMin(nmin, nz[i]); 1278 navg += nz[i]; 1279 } 1280 PetscCall(PetscFree(nz)); 1281 navg = navg / size; 1282 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1283 PetscFunctionReturn(PETSC_SUCCESS); 1284 } 1285 PetscCall(PetscViewerGetFormat(viewer, &format)); 1286 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1287 MatInfo info; 1288 PetscInt *inodes = NULL; 1289 1290 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1291 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1292 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1293 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1294 if (!inodes) { 1295 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1296 (double)info.memory)); 1297 } else { 1298 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1299 (double)info.memory)); 1300 } 1301 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1302 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1303 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1304 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1305 PetscCall(PetscViewerFlush(viewer)); 1306 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1307 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1308 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1309 PetscFunctionReturn(PETSC_SUCCESS); 1310 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1311 PetscInt inodecount, inodelimit, *inodes; 1312 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1313 if (inodes) { 1314 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1315 } else { 1316 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1317 } 1318 PetscFunctionReturn(PETSC_SUCCESS); 1319 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1320 PetscFunctionReturn(PETSC_SUCCESS); 1321 } 1322 } else if (isbinary) { 1323 if (size == 1) { 1324 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1325 PetscCall(MatView(aij->A, viewer)); 1326 } else { 1327 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1328 } 1329 PetscFunctionReturn(PETSC_SUCCESS); 1330 } else if (iascii && size == 1) { 1331 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1332 PetscCall(MatView(aij->A, viewer)); 1333 PetscFunctionReturn(PETSC_SUCCESS); 1334 } else if (isdraw) { 1335 PetscDraw draw; 1336 PetscBool isnull; 1337 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1338 PetscCall(PetscDrawIsNull(draw, &isnull)); 1339 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1340 } 1341 1342 { /* assemble the entire matrix onto first processor */ 1343 Mat A = NULL, Av; 1344 IS isrow, iscol; 1345 1346 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1347 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1348 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1349 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1350 /* The commented code uses MatCreateSubMatrices instead */ 1351 /* 1352 Mat *AA, A = NULL, Av; 1353 IS isrow,iscol; 1354 1355 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1356 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1357 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1358 if (rank == 0) { 1359 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1360 A = AA[0]; 1361 Av = AA[0]; 1362 } 1363 PetscCall(MatDestroySubMatrices(1,&AA)); 1364 */ 1365 PetscCall(ISDestroy(&iscol)); 1366 PetscCall(ISDestroy(&isrow)); 1367 /* 1368 Everyone has to call to draw the matrix since the graphics waits are 1369 synchronized across all processors that share the PetscDraw object 1370 */ 1371 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1372 if (rank == 0) { 1373 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1374 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1375 } 1376 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 PetscCall(PetscViewerFlush(viewer)); 1378 PetscCall(MatDestroy(&A)); 1379 } 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1384 { 1385 PetscBool iascii, isdraw, issocket, isbinary; 1386 1387 PetscFunctionBegin; 1388 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1389 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1390 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1391 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1392 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1393 PetscFunctionReturn(PETSC_SUCCESS); 1394 } 1395 1396 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1397 { 1398 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1399 Vec bb1 = NULL; 1400 PetscBool hasop; 1401 1402 PetscFunctionBegin; 1403 if (flag == SOR_APPLY_UPPER) { 1404 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1405 PetscFunctionReturn(PETSC_SUCCESS); 1406 } 1407 1408 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1409 1410 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1411 if (flag & SOR_ZERO_INITIAL_GUESS) { 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1413 its--; 1414 } 1415 1416 while (its--) { 1417 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1418 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1419 1420 /* update rhs: bb1 = bb - B*x */ 1421 PetscCall(VecScale(mat->lvec, -1.0)); 1422 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1423 1424 /* local sweep */ 1425 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1426 } 1427 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1428 if (flag & SOR_ZERO_INITIAL_GUESS) { 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1430 its--; 1431 } 1432 while (its--) { 1433 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1434 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1435 1436 /* update rhs: bb1 = bb - B*x */ 1437 PetscCall(VecScale(mat->lvec, -1.0)); 1438 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1439 1440 /* local sweep */ 1441 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1442 } 1443 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1444 if (flag & SOR_ZERO_INITIAL_GUESS) { 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1446 its--; 1447 } 1448 while (its--) { 1449 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1450 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1451 1452 /* update rhs: bb1 = bb - B*x */ 1453 PetscCall(VecScale(mat->lvec, -1.0)); 1454 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1455 1456 /* local sweep */ 1457 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1458 } 1459 } else if (flag & SOR_EISENSTAT) { 1460 Vec xx1; 1461 1462 PetscCall(VecDuplicate(bb, &xx1)); 1463 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1464 1465 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1466 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1467 if (!mat->diag) { 1468 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1469 PetscCall(MatGetDiagonal(matin, mat->diag)); 1470 } 1471 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1472 if (hasop) { 1473 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1474 } else { 1475 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1476 } 1477 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1478 1479 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1480 1481 /* local sweep */ 1482 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1483 PetscCall(VecAXPY(xx, 1.0, xx1)); 1484 PetscCall(VecDestroy(&xx1)); 1485 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1486 1487 PetscCall(VecDestroy(&bb1)); 1488 1489 matin->factorerrortype = mat->A->factorerrortype; 1490 PetscFunctionReturn(PETSC_SUCCESS); 1491 } 1492 1493 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1494 { 1495 Mat aA, aB, Aperm; 1496 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1497 PetscScalar *aa, *ba; 1498 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1499 PetscSF rowsf, sf; 1500 IS parcolp = NULL; 1501 PetscBool done; 1502 1503 PetscFunctionBegin; 1504 PetscCall(MatGetLocalSize(A, &m, &n)); 1505 PetscCall(ISGetIndices(rowp, &rwant)); 1506 PetscCall(ISGetIndices(colp, &cwant)); 1507 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1508 1509 /* Invert row permutation to find out where my rows should go */ 1510 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1511 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1512 PetscCall(PetscSFSetFromOptions(rowsf)); 1513 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1514 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1515 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1516 1517 /* Invert column permutation to find out where my columns should go */ 1518 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1519 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1520 PetscCall(PetscSFSetFromOptions(sf)); 1521 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1522 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1523 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1524 PetscCall(PetscSFDestroy(&sf)); 1525 1526 PetscCall(ISRestoreIndices(rowp, &rwant)); 1527 PetscCall(ISRestoreIndices(colp, &cwant)); 1528 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1529 1530 /* Find out where my gcols should go */ 1531 PetscCall(MatGetSize(aB, NULL, &ng)); 1532 PetscCall(PetscMalloc1(ng, &gcdest)); 1533 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1534 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1535 PetscCall(PetscSFSetFromOptions(sf)); 1536 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1537 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1538 PetscCall(PetscSFDestroy(&sf)); 1539 1540 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1541 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1542 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1543 for (i = 0; i < m; i++) { 1544 PetscInt row = rdest[i]; 1545 PetscMPIInt rowner; 1546 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1547 for (j = ai[i]; j < ai[i + 1]; j++) { 1548 PetscInt col = cdest[aj[j]]; 1549 PetscMPIInt cowner; 1550 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1551 if (rowner == cowner) dnnz[i]++; 1552 else onnz[i]++; 1553 } 1554 for (j = bi[i]; j < bi[i + 1]; j++) { 1555 PetscInt col = gcdest[bj[j]]; 1556 PetscMPIInt cowner; 1557 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1558 if (rowner == cowner) dnnz[i]++; 1559 else onnz[i]++; 1560 } 1561 } 1562 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1563 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1564 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1565 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1566 PetscCall(PetscSFDestroy(&rowsf)); 1567 1568 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1569 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1570 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1571 for (i = 0; i < m; i++) { 1572 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1573 PetscInt j0, rowlen; 1574 rowlen = ai[i + 1] - ai[i]; 1575 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1576 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1577 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1578 } 1579 rowlen = bi[i + 1] - bi[i]; 1580 for (j0 = j = 0; j < rowlen; j0 = j) { 1581 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1582 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1583 } 1584 } 1585 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1586 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1587 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1588 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1589 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1590 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1591 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1592 PetscCall(PetscFree3(work, rdest, cdest)); 1593 PetscCall(PetscFree(gcdest)); 1594 if (parcolp) PetscCall(ISDestroy(&colp)); 1595 *B = Aperm; 1596 PetscFunctionReturn(PETSC_SUCCESS); 1597 } 1598 1599 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1600 { 1601 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1602 1603 PetscFunctionBegin; 1604 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1605 if (ghosts) *ghosts = aij->garray; 1606 PetscFunctionReturn(PETSC_SUCCESS); 1607 } 1608 1609 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1610 { 1611 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1612 Mat A = mat->A, B = mat->B; 1613 PetscLogDouble isend[5], irecv[5]; 1614 1615 PetscFunctionBegin; 1616 info->block_size = 1.0; 1617 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1618 1619 isend[0] = info->nz_used; 1620 isend[1] = info->nz_allocated; 1621 isend[2] = info->nz_unneeded; 1622 isend[3] = info->memory; 1623 isend[4] = info->mallocs; 1624 1625 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1626 1627 isend[0] += info->nz_used; 1628 isend[1] += info->nz_allocated; 1629 isend[2] += info->nz_unneeded; 1630 isend[3] += info->memory; 1631 isend[4] += info->mallocs; 1632 if (flag == MAT_LOCAL) { 1633 info->nz_used = isend[0]; 1634 info->nz_allocated = isend[1]; 1635 info->nz_unneeded = isend[2]; 1636 info->memory = isend[3]; 1637 info->mallocs = isend[4]; 1638 } else if (flag == MAT_GLOBAL_MAX) { 1639 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1640 1641 info->nz_used = irecv[0]; 1642 info->nz_allocated = irecv[1]; 1643 info->nz_unneeded = irecv[2]; 1644 info->memory = irecv[3]; 1645 info->mallocs = irecv[4]; 1646 } else if (flag == MAT_GLOBAL_SUM) { 1647 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1648 1649 info->nz_used = irecv[0]; 1650 info->nz_allocated = irecv[1]; 1651 info->nz_unneeded = irecv[2]; 1652 info->memory = irecv[3]; 1653 info->mallocs = irecv[4]; 1654 } 1655 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1656 info->fill_ratio_needed = 0; 1657 info->factor_mallocs = 0; 1658 PetscFunctionReturn(PETSC_SUCCESS); 1659 } 1660 1661 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1662 { 1663 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1664 1665 PetscFunctionBegin; 1666 switch (op) { 1667 case MAT_NEW_NONZERO_LOCATIONS: 1668 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1669 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1670 case MAT_KEEP_NONZERO_PATTERN: 1671 case MAT_NEW_NONZERO_LOCATION_ERR: 1672 case MAT_USE_INODES: 1673 case MAT_IGNORE_ZERO_ENTRIES: 1674 case MAT_FORM_EXPLICIT_TRANSPOSE: 1675 MatCheckPreallocated(A, 1); 1676 PetscCall(MatSetOption(a->A, op, flg)); 1677 PetscCall(MatSetOption(a->B, op, flg)); 1678 break; 1679 case MAT_ROW_ORIENTED: 1680 MatCheckPreallocated(A, 1); 1681 a->roworiented = flg; 1682 1683 PetscCall(MatSetOption(a->A, op, flg)); 1684 PetscCall(MatSetOption(a->B, op, flg)); 1685 break; 1686 case MAT_FORCE_DIAGONAL_ENTRIES: 1687 case MAT_SORTED_FULL: 1688 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1689 break; 1690 case MAT_IGNORE_OFF_PROC_ENTRIES: 1691 a->donotstash = flg; 1692 break; 1693 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1694 case MAT_SPD: 1695 case MAT_SYMMETRIC: 1696 case MAT_STRUCTURALLY_SYMMETRIC: 1697 case MAT_HERMITIAN: 1698 case MAT_SYMMETRY_ETERNAL: 1699 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1700 case MAT_SPD_ETERNAL: 1701 /* if the diagonal matrix is square it inherits some of the properties above */ 1702 break; 1703 case MAT_SUBMAT_SINGLEIS: 1704 A->submat_singleis = flg; 1705 break; 1706 case MAT_STRUCTURE_ONLY: 1707 /* The option is handled directly by MatSetOption() */ 1708 break; 1709 default: 1710 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1711 } 1712 PetscFunctionReturn(PETSC_SUCCESS); 1713 } 1714 1715 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1716 { 1717 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1718 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1719 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1720 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1721 PetscInt *cmap, *idx_p; 1722 1723 PetscFunctionBegin; 1724 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1725 mat->getrowactive = PETSC_TRUE; 1726 1727 if (!mat->rowvalues && (idx || v)) { 1728 /* 1729 allocate enough space to hold information from the longest row. 1730 */ 1731 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1732 PetscInt max = 1, tmp; 1733 for (i = 0; i < matin->rmap->n; i++) { 1734 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1735 if (max < tmp) max = tmp; 1736 } 1737 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1738 } 1739 1740 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1741 lrow = row - rstart; 1742 1743 pvA = &vworkA; 1744 pcA = &cworkA; 1745 pvB = &vworkB; 1746 pcB = &cworkB; 1747 if (!v) { 1748 pvA = NULL; 1749 pvB = NULL; 1750 } 1751 if (!idx) { 1752 pcA = NULL; 1753 if (!v) pcB = NULL; 1754 } 1755 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1756 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1757 nztot = nzA + nzB; 1758 1759 cmap = mat->garray; 1760 if (v || idx) { 1761 if (nztot) { 1762 /* Sort by increasing column numbers, assuming A and B already sorted */ 1763 PetscInt imark = -1; 1764 if (v) { 1765 *v = v_p = mat->rowvalues; 1766 for (i = 0; i < nzB; i++) { 1767 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1768 else break; 1769 } 1770 imark = i; 1771 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1772 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1773 } 1774 if (idx) { 1775 *idx = idx_p = mat->rowindices; 1776 if (imark > -1) { 1777 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1778 } else { 1779 for (i = 0; i < nzB; i++) { 1780 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1781 else break; 1782 } 1783 imark = i; 1784 } 1785 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1786 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1787 } 1788 } else { 1789 if (idx) *idx = NULL; 1790 if (v) *v = NULL; 1791 } 1792 } 1793 *nz = nztot; 1794 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1795 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1796 PetscFunctionReturn(PETSC_SUCCESS); 1797 } 1798 1799 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1800 { 1801 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1802 1803 PetscFunctionBegin; 1804 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1805 aij->getrowactive = PETSC_FALSE; 1806 PetscFunctionReturn(PETSC_SUCCESS); 1807 } 1808 1809 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1810 { 1811 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1812 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1813 PetscInt i, j, cstart = mat->cmap->rstart; 1814 PetscReal sum = 0.0; 1815 const MatScalar *v, *amata, *bmata; 1816 1817 PetscFunctionBegin; 1818 if (aij->size == 1) { 1819 PetscCall(MatNorm(aij->A, type, norm)); 1820 } else { 1821 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1822 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1823 if (type == NORM_FROBENIUS) { 1824 v = amata; 1825 for (i = 0; i < amat->nz; i++) { 1826 sum += PetscRealPart(PetscConj(*v) * (*v)); 1827 v++; 1828 } 1829 v = bmata; 1830 for (i = 0; i < bmat->nz; i++) { 1831 sum += PetscRealPart(PetscConj(*v) * (*v)); 1832 v++; 1833 } 1834 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1835 *norm = PetscSqrtReal(*norm); 1836 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1837 } else if (type == NORM_1) { /* max column norm */ 1838 PetscReal *tmp, *tmp2; 1839 PetscInt *jj, *garray = aij->garray; 1840 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1841 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1842 *norm = 0.0; 1843 v = amata; 1844 jj = amat->j; 1845 for (j = 0; j < amat->nz; j++) { 1846 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1847 v++; 1848 } 1849 v = bmata; 1850 jj = bmat->j; 1851 for (j = 0; j < bmat->nz; j++) { 1852 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1853 v++; 1854 } 1855 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1856 for (j = 0; j < mat->cmap->N; j++) { 1857 if (tmp2[j] > *norm) *norm = tmp2[j]; 1858 } 1859 PetscCall(PetscFree(tmp)); 1860 PetscCall(PetscFree(tmp2)); 1861 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1862 } else if (type == NORM_INFINITY) { /* max row norm */ 1863 PetscReal ntemp = 0.0; 1864 for (j = 0; j < aij->A->rmap->n; j++) { 1865 v = amata + amat->i[j]; 1866 sum = 0.0; 1867 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1868 sum += PetscAbsScalar(*v); 1869 v++; 1870 } 1871 v = bmata + bmat->i[j]; 1872 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1873 sum += PetscAbsScalar(*v); 1874 v++; 1875 } 1876 if (sum > ntemp) ntemp = sum; 1877 } 1878 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1879 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1880 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1881 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1882 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1883 } 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } 1886 1887 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1888 { 1889 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1890 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1891 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1892 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1893 Mat B, A_diag, *B_diag; 1894 const MatScalar *pbv, *bv; 1895 1896 PetscFunctionBegin; 1897 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1898 ma = A->rmap->n; 1899 na = A->cmap->n; 1900 mb = a->B->rmap->n; 1901 nb = a->B->cmap->n; 1902 ai = Aloc->i; 1903 aj = Aloc->j; 1904 bi = Bloc->i; 1905 bj = Bloc->j; 1906 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1907 PetscInt *d_nnz, *g_nnz, *o_nnz; 1908 PetscSFNode *oloc; 1909 PETSC_UNUSED PetscSF sf; 1910 1911 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1912 /* compute d_nnz for preallocation */ 1913 PetscCall(PetscArrayzero(d_nnz, na)); 1914 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1915 /* compute local off-diagonal contributions */ 1916 PetscCall(PetscArrayzero(g_nnz, nb)); 1917 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1918 /* map those to global */ 1919 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1920 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1921 PetscCall(PetscSFSetFromOptions(sf)); 1922 PetscCall(PetscArrayzero(o_nnz, na)); 1923 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1924 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1925 PetscCall(PetscSFDestroy(&sf)); 1926 1927 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1928 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1929 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1930 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1931 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1932 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1933 } else { 1934 B = *matout; 1935 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1936 } 1937 1938 b = (Mat_MPIAIJ *)B->data; 1939 A_diag = a->A; 1940 B_diag = &b->A; 1941 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1942 A_diag_ncol = A_diag->cmap->N; 1943 B_diag_ilen = sub_B_diag->ilen; 1944 B_diag_i = sub_B_diag->i; 1945 1946 /* Set ilen for diagonal of B */ 1947 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1948 1949 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1950 very quickly (=without using MatSetValues), because all writes are local. */ 1951 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1952 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1953 1954 /* copy over the B part */ 1955 PetscCall(PetscMalloc1(bi[mb], &cols)); 1956 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1957 pbv = bv; 1958 row = A->rmap->rstart; 1959 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1960 cols_tmp = cols; 1961 for (i = 0; i < mb; i++) { 1962 ncol = bi[i + 1] - bi[i]; 1963 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1964 row++; 1965 pbv += ncol; 1966 cols_tmp += ncol; 1967 } 1968 PetscCall(PetscFree(cols)); 1969 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1970 1971 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1972 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1973 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1974 *matout = B; 1975 } else { 1976 PetscCall(MatHeaderMerge(A, &B)); 1977 } 1978 PetscFunctionReturn(PETSC_SUCCESS); 1979 } 1980 1981 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1982 { 1983 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1984 Mat a = aij->A, b = aij->B; 1985 PetscInt s1, s2, s3; 1986 1987 PetscFunctionBegin; 1988 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1989 if (rr) { 1990 PetscCall(VecGetLocalSize(rr, &s1)); 1991 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1992 /* Overlap communication with computation. */ 1993 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1994 } 1995 if (ll) { 1996 PetscCall(VecGetLocalSize(ll, &s1)); 1997 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1998 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1999 } 2000 /* scale the diagonal block */ 2001 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2002 2003 if (rr) { 2004 /* Do a scatter end and then right scale the off-diagonal block */ 2005 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2006 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2007 } 2008 PetscFunctionReturn(PETSC_SUCCESS); 2009 } 2010 2011 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2012 { 2013 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2014 2015 PetscFunctionBegin; 2016 PetscCall(MatSetUnfactored(a->A)); 2017 PetscFunctionReturn(PETSC_SUCCESS); 2018 } 2019 2020 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2021 { 2022 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2023 Mat a, b, c, d; 2024 PetscBool flg; 2025 2026 PetscFunctionBegin; 2027 a = matA->A; 2028 b = matA->B; 2029 c = matB->A; 2030 d = matB->B; 2031 2032 PetscCall(MatEqual(a, c, &flg)); 2033 if (flg) PetscCall(MatEqual(b, d, &flg)); 2034 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2035 PetscFunctionReturn(PETSC_SUCCESS); 2036 } 2037 2038 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2039 { 2040 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2041 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2042 2043 PetscFunctionBegin; 2044 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2045 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2046 /* because of the column compression in the off-processor part of the matrix a->B, 2047 the number of columns in a->B and b->B may be different, hence we cannot call 2048 the MatCopy() directly on the two parts. If need be, we can provide a more 2049 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2050 then copying the submatrices */ 2051 PetscCall(MatCopy_Basic(A, B, str)); 2052 } else { 2053 PetscCall(MatCopy(a->A, b->A, str)); 2054 PetscCall(MatCopy(a->B, b->B, str)); 2055 } 2056 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2057 PetscFunctionReturn(PETSC_SUCCESS); 2058 } 2059 2060 /* 2061 Computes the number of nonzeros per row needed for preallocation when X and Y 2062 have different nonzero structure. 2063 */ 2064 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2065 { 2066 PetscInt i, j, k, nzx, nzy; 2067 2068 PetscFunctionBegin; 2069 /* Set the number of nonzeros in the new matrix */ 2070 for (i = 0; i < m; i++) { 2071 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2072 nzx = xi[i + 1] - xi[i]; 2073 nzy = yi[i + 1] - yi[i]; 2074 nnz[i] = 0; 2075 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2076 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2077 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2078 nnz[i]++; 2079 } 2080 for (; k < nzy; k++) nnz[i]++; 2081 } 2082 PetscFunctionReturn(PETSC_SUCCESS); 2083 } 2084 2085 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2086 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2087 { 2088 PetscInt m = Y->rmap->N; 2089 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2090 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2091 2092 PetscFunctionBegin; 2093 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2094 PetscFunctionReturn(PETSC_SUCCESS); 2095 } 2096 2097 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2098 { 2099 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2100 2101 PetscFunctionBegin; 2102 if (str == SAME_NONZERO_PATTERN) { 2103 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2104 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2105 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2106 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2107 } else { 2108 Mat B; 2109 PetscInt *nnz_d, *nnz_o; 2110 2111 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2112 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2113 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2114 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2115 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2116 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2117 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2118 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2119 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2120 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2121 PetscCall(MatHeaderMerge(Y, &B)); 2122 PetscCall(PetscFree(nnz_d)); 2123 PetscCall(PetscFree(nnz_o)); 2124 } 2125 PetscFunctionReturn(PETSC_SUCCESS); 2126 } 2127 2128 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2129 2130 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2131 { 2132 PetscFunctionBegin; 2133 if (PetscDefined(USE_COMPLEX)) { 2134 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2135 2136 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2137 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2138 } 2139 PetscFunctionReturn(PETSC_SUCCESS); 2140 } 2141 2142 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2143 { 2144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2145 2146 PetscFunctionBegin; 2147 PetscCall(MatRealPart(a->A)); 2148 PetscCall(MatRealPart(a->B)); 2149 PetscFunctionReturn(PETSC_SUCCESS); 2150 } 2151 2152 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2153 { 2154 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2155 2156 PetscFunctionBegin; 2157 PetscCall(MatImaginaryPart(a->A)); 2158 PetscCall(MatImaginaryPart(a->B)); 2159 PetscFunctionReturn(PETSC_SUCCESS); 2160 } 2161 2162 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2163 { 2164 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2165 PetscInt i, *idxb = NULL, m = A->rmap->n; 2166 PetscScalar *va, *vv; 2167 Vec vB, vA; 2168 const PetscScalar *vb; 2169 2170 PetscFunctionBegin; 2171 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2172 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2173 2174 PetscCall(VecGetArrayWrite(vA, &va)); 2175 if (idx) { 2176 for (i = 0; i < m; i++) { 2177 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2178 } 2179 } 2180 2181 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2182 PetscCall(PetscMalloc1(m, &idxb)); 2183 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2184 2185 PetscCall(VecGetArrayWrite(v, &vv)); 2186 PetscCall(VecGetArrayRead(vB, &vb)); 2187 for (i = 0; i < m; i++) { 2188 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2189 vv[i] = vb[i]; 2190 if (idx) idx[i] = a->garray[idxb[i]]; 2191 } else { 2192 vv[i] = va[i]; 2193 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2194 } 2195 } 2196 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2197 PetscCall(VecRestoreArrayWrite(vA, &va)); 2198 PetscCall(VecRestoreArrayRead(vB, &vb)); 2199 PetscCall(PetscFree(idxb)); 2200 PetscCall(VecDestroy(&vA)); 2201 PetscCall(VecDestroy(&vB)); 2202 PetscFunctionReturn(PETSC_SUCCESS); 2203 } 2204 2205 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2206 { 2207 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2208 PetscInt m = A->rmap->n, n = A->cmap->n; 2209 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2210 PetscInt *cmap = mat->garray; 2211 PetscInt *diagIdx, *offdiagIdx; 2212 Vec diagV, offdiagV; 2213 PetscScalar *a, *diagA, *offdiagA; 2214 const PetscScalar *ba, *bav; 2215 PetscInt r, j, col, ncols, *bi, *bj; 2216 Mat B = mat->B; 2217 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2218 2219 PetscFunctionBegin; 2220 /* When a process holds entire A and other processes have no entry */ 2221 if (A->cmap->N == n) { 2222 PetscCall(VecGetArrayWrite(v, &diagA)); 2223 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2224 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2225 PetscCall(VecDestroy(&diagV)); 2226 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } else if (n == 0) { 2229 if (m) { 2230 PetscCall(VecGetArrayWrite(v, &a)); 2231 for (r = 0; r < m; r++) { 2232 a[r] = 0.0; 2233 if (idx) idx[r] = -1; 2234 } 2235 PetscCall(VecRestoreArrayWrite(v, &a)); 2236 } 2237 PetscFunctionReturn(PETSC_SUCCESS); 2238 } 2239 2240 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2241 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2242 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2243 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2244 2245 /* Get offdiagIdx[] for implicit 0.0 */ 2246 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2247 ba = bav; 2248 bi = b->i; 2249 bj = b->j; 2250 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2251 for (r = 0; r < m; r++) { 2252 ncols = bi[r + 1] - bi[r]; 2253 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2254 offdiagA[r] = *ba; 2255 offdiagIdx[r] = cmap[0]; 2256 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2257 offdiagA[r] = 0.0; 2258 2259 /* Find first hole in the cmap */ 2260 for (j = 0; j < ncols; j++) { 2261 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2262 if (col > j && j < cstart) { 2263 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2264 break; 2265 } else if (col > j + n && j >= cstart) { 2266 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2267 break; 2268 } 2269 } 2270 if (j == ncols && ncols < A->cmap->N - n) { 2271 /* a hole is outside compressed Bcols */ 2272 if (ncols == 0) { 2273 if (cstart) { 2274 offdiagIdx[r] = 0; 2275 } else offdiagIdx[r] = cend; 2276 } else { /* ncols > 0 */ 2277 offdiagIdx[r] = cmap[ncols - 1] + 1; 2278 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2279 } 2280 } 2281 } 2282 2283 for (j = 0; j < ncols; j++) { 2284 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2285 offdiagA[r] = *ba; 2286 offdiagIdx[r] = cmap[*bj]; 2287 } 2288 ba++; 2289 bj++; 2290 } 2291 } 2292 2293 PetscCall(VecGetArrayWrite(v, &a)); 2294 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2295 for (r = 0; r < m; ++r) { 2296 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2297 a[r] = diagA[r]; 2298 if (idx) idx[r] = cstart + diagIdx[r]; 2299 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2300 a[r] = diagA[r]; 2301 if (idx) { 2302 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2303 idx[r] = cstart + diagIdx[r]; 2304 } else idx[r] = offdiagIdx[r]; 2305 } 2306 } else { 2307 a[r] = offdiagA[r]; 2308 if (idx) idx[r] = offdiagIdx[r]; 2309 } 2310 } 2311 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2312 PetscCall(VecRestoreArrayWrite(v, &a)); 2313 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2314 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2315 PetscCall(VecDestroy(&diagV)); 2316 PetscCall(VecDestroy(&offdiagV)); 2317 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2318 PetscFunctionReturn(PETSC_SUCCESS); 2319 } 2320 2321 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2322 { 2323 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2324 PetscInt m = A->rmap->n, n = A->cmap->n; 2325 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2326 PetscInt *cmap = mat->garray; 2327 PetscInt *diagIdx, *offdiagIdx; 2328 Vec diagV, offdiagV; 2329 PetscScalar *a, *diagA, *offdiagA; 2330 const PetscScalar *ba, *bav; 2331 PetscInt r, j, col, ncols, *bi, *bj; 2332 Mat B = mat->B; 2333 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2334 2335 PetscFunctionBegin; 2336 /* When a process holds entire A and other processes have no entry */ 2337 if (A->cmap->N == n) { 2338 PetscCall(VecGetArrayWrite(v, &diagA)); 2339 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2340 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2341 PetscCall(VecDestroy(&diagV)); 2342 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2343 PetscFunctionReturn(PETSC_SUCCESS); 2344 } else if (n == 0) { 2345 if (m) { 2346 PetscCall(VecGetArrayWrite(v, &a)); 2347 for (r = 0; r < m; r++) { 2348 a[r] = PETSC_MAX_REAL; 2349 if (idx) idx[r] = -1; 2350 } 2351 PetscCall(VecRestoreArrayWrite(v, &a)); 2352 } 2353 PetscFunctionReturn(PETSC_SUCCESS); 2354 } 2355 2356 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2357 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2358 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2359 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2360 2361 /* Get offdiagIdx[] for implicit 0.0 */ 2362 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2363 ba = bav; 2364 bi = b->i; 2365 bj = b->j; 2366 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2367 for (r = 0; r < m; r++) { 2368 ncols = bi[r + 1] - bi[r]; 2369 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2370 offdiagA[r] = *ba; 2371 offdiagIdx[r] = cmap[0]; 2372 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2373 offdiagA[r] = 0.0; 2374 2375 /* Find first hole in the cmap */ 2376 for (j = 0; j < ncols; j++) { 2377 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2378 if (col > j && j < cstart) { 2379 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2380 break; 2381 } else if (col > j + n && j >= cstart) { 2382 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2383 break; 2384 } 2385 } 2386 if (j == ncols && ncols < A->cmap->N - n) { 2387 /* a hole is outside compressed Bcols */ 2388 if (ncols == 0) { 2389 if (cstart) { 2390 offdiagIdx[r] = 0; 2391 } else offdiagIdx[r] = cend; 2392 } else { /* ncols > 0 */ 2393 offdiagIdx[r] = cmap[ncols - 1] + 1; 2394 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2395 } 2396 } 2397 } 2398 2399 for (j = 0; j < ncols; j++) { 2400 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2401 offdiagA[r] = *ba; 2402 offdiagIdx[r] = cmap[*bj]; 2403 } 2404 ba++; 2405 bj++; 2406 } 2407 } 2408 2409 PetscCall(VecGetArrayWrite(v, &a)); 2410 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2411 for (r = 0; r < m; ++r) { 2412 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2413 a[r] = diagA[r]; 2414 if (idx) idx[r] = cstart + diagIdx[r]; 2415 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2416 a[r] = diagA[r]; 2417 if (idx) { 2418 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2419 idx[r] = cstart + diagIdx[r]; 2420 } else idx[r] = offdiagIdx[r]; 2421 } 2422 } else { 2423 a[r] = offdiagA[r]; 2424 if (idx) idx[r] = offdiagIdx[r]; 2425 } 2426 } 2427 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2428 PetscCall(VecRestoreArrayWrite(v, &a)); 2429 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2430 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2431 PetscCall(VecDestroy(&diagV)); 2432 PetscCall(VecDestroy(&offdiagV)); 2433 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2434 PetscFunctionReturn(PETSC_SUCCESS); 2435 } 2436 2437 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2438 { 2439 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2440 PetscInt m = A->rmap->n, n = A->cmap->n; 2441 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2442 PetscInt *cmap = mat->garray; 2443 PetscInt *diagIdx, *offdiagIdx; 2444 Vec diagV, offdiagV; 2445 PetscScalar *a, *diagA, *offdiagA; 2446 const PetscScalar *ba, *bav; 2447 PetscInt r, j, col, ncols, *bi, *bj; 2448 Mat B = mat->B; 2449 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2450 2451 PetscFunctionBegin; 2452 /* When a process holds entire A and other processes have no entry */ 2453 if (A->cmap->N == n) { 2454 PetscCall(VecGetArrayWrite(v, &diagA)); 2455 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2456 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2457 PetscCall(VecDestroy(&diagV)); 2458 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2459 PetscFunctionReturn(PETSC_SUCCESS); 2460 } else if (n == 0) { 2461 if (m) { 2462 PetscCall(VecGetArrayWrite(v, &a)); 2463 for (r = 0; r < m; r++) { 2464 a[r] = PETSC_MIN_REAL; 2465 if (idx) idx[r] = -1; 2466 } 2467 PetscCall(VecRestoreArrayWrite(v, &a)); 2468 } 2469 PetscFunctionReturn(PETSC_SUCCESS); 2470 } 2471 2472 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2473 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2474 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2475 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2476 2477 /* Get offdiagIdx[] for implicit 0.0 */ 2478 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2479 ba = bav; 2480 bi = b->i; 2481 bj = b->j; 2482 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2483 for (r = 0; r < m; r++) { 2484 ncols = bi[r + 1] - bi[r]; 2485 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2486 offdiagA[r] = *ba; 2487 offdiagIdx[r] = cmap[0]; 2488 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2489 offdiagA[r] = 0.0; 2490 2491 /* Find first hole in the cmap */ 2492 for (j = 0; j < ncols; j++) { 2493 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2494 if (col > j && j < cstart) { 2495 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2496 break; 2497 } else if (col > j + n && j >= cstart) { 2498 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2499 break; 2500 } 2501 } 2502 if (j == ncols && ncols < A->cmap->N - n) { 2503 /* a hole is outside compressed Bcols */ 2504 if (ncols == 0) { 2505 if (cstart) { 2506 offdiagIdx[r] = 0; 2507 } else offdiagIdx[r] = cend; 2508 } else { /* ncols > 0 */ 2509 offdiagIdx[r] = cmap[ncols - 1] + 1; 2510 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2511 } 2512 } 2513 } 2514 2515 for (j = 0; j < ncols; j++) { 2516 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2517 offdiagA[r] = *ba; 2518 offdiagIdx[r] = cmap[*bj]; 2519 } 2520 ba++; 2521 bj++; 2522 } 2523 } 2524 2525 PetscCall(VecGetArrayWrite(v, &a)); 2526 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2527 for (r = 0; r < m; ++r) { 2528 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2529 a[r] = diagA[r]; 2530 if (idx) idx[r] = cstart + diagIdx[r]; 2531 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2532 a[r] = diagA[r]; 2533 if (idx) { 2534 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2535 idx[r] = cstart + diagIdx[r]; 2536 } else idx[r] = offdiagIdx[r]; 2537 } 2538 } else { 2539 a[r] = offdiagA[r]; 2540 if (idx) idx[r] = offdiagIdx[r]; 2541 } 2542 } 2543 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2544 PetscCall(VecRestoreArrayWrite(v, &a)); 2545 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2546 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2547 PetscCall(VecDestroy(&diagV)); 2548 PetscCall(VecDestroy(&offdiagV)); 2549 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2550 PetscFunctionReturn(PETSC_SUCCESS); 2551 } 2552 2553 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2554 { 2555 Mat *dummy; 2556 2557 PetscFunctionBegin; 2558 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2559 *newmat = *dummy; 2560 PetscCall(PetscFree(dummy)); 2561 PetscFunctionReturn(PETSC_SUCCESS); 2562 } 2563 2564 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2565 { 2566 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2567 2568 PetscFunctionBegin; 2569 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2570 A->factorerrortype = a->A->factorerrortype; 2571 PetscFunctionReturn(PETSC_SUCCESS); 2572 } 2573 2574 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2575 { 2576 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2577 2578 PetscFunctionBegin; 2579 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2580 PetscCall(MatSetRandom(aij->A, rctx)); 2581 if (x->assembled) { 2582 PetscCall(MatSetRandom(aij->B, rctx)); 2583 } else { 2584 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2585 } 2586 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2587 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2588 PetscFunctionReturn(PETSC_SUCCESS); 2589 } 2590 2591 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2592 { 2593 PetscFunctionBegin; 2594 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2595 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2596 PetscFunctionReturn(PETSC_SUCCESS); 2597 } 2598 2599 /*@ 2600 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2601 2602 Not Collective 2603 2604 Input Parameter: 2605 . A - the matrix 2606 2607 Output Parameter: 2608 . nz - the number of nonzeros 2609 2610 Level: advanced 2611 2612 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2613 @*/ 2614 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2615 { 2616 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2617 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2618 PetscBool isaij; 2619 2620 PetscFunctionBegin; 2621 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2622 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2623 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2624 PetscFunctionReturn(PETSC_SUCCESS); 2625 } 2626 2627 /*@ 2628 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2629 2630 Collective 2631 2632 Input Parameters: 2633 + A - the matrix 2634 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2635 2636 Level: advanced 2637 2638 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2639 @*/ 2640 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2641 { 2642 PetscFunctionBegin; 2643 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2644 PetscFunctionReturn(PETSC_SUCCESS); 2645 } 2646 2647 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2648 { 2649 PetscBool sc = PETSC_FALSE, flg; 2650 2651 PetscFunctionBegin; 2652 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2653 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2654 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2655 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2656 PetscOptionsHeadEnd(); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2661 { 2662 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2663 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2664 2665 PetscFunctionBegin; 2666 if (!Y->preallocated) { 2667 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2668 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2669 PetscInt nonew = aij->nonew; 2670 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2671 aij->nonew = nonew; 2672 } 2673 PetscCall(MatShift_Basic(Y, a)); 2674 PetscFunctionReturn(PETSC_SUCCESS); 2675 } 2676 2677 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2678 { 2679 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2680 2681 PetscFunctionBegin; 2682 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2683 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2684 if (d) { 2685 PetscInt rstart; 2686 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2687 *d += rstart; 2688 } 2689 PetscFunctionReturn(PETSC_SUCCESS); 2690 } 2691 2692 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2693 { 2694 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2695 2696 PetscFunctionBegin; 2697 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2698 PetscFunctionReturn(PETSC_SUCCESS); 2699 } 2700 2701 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2702 { 2703 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2704 2705 PetscFunctionBegin; 2706 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2707 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2708 PetscFunctionReturn(PETSC_SUCCESS); 2709 } 2710 2711 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2712 MatGetRow_MPIAIJ, 2713 MatRestoreRow_MPIAIJ, 2714 MatMult_MPIAIJ, 2715 /* 4*/ MatMultAdd_MPIAIJ, 2716 MatMultTranspose_MPIAIJ, 2717 MatMultTransposeAdd_MPIAIJ, 2718 NULL, 2719 NULL, 2720 NULL, 2721 /*10*/ NULL, 2722 NULL, 2723 NULL, 2724 MatSOR_MPIAIJ, 2725 MatTranspose_MPIAIJ, 2726 /*15*/ MatGetInfo_MPIAIJ, 2727 MatEqual_MPIAIJ, 2728 MatGetDiagonal_MPIAIJ, 2729 MatDiagonalScale_MPIAIJ, 2730 MatNorm_MPIAIJ, 2731 /*20*/ MatAssemblyBegin_MPIAIJ, 2732 MatAssemblyEnd_MPIAIJ, 2733 MatSetOption_MPIAIJ, 2734 MatZeroEntries_MPIAIJ, 2735 /*24*/ MatZeroRows_MPIAIJ, 2736 NULL, 2737 NULL, 2738 NULL, 2739 NULL, 2740 /*29*/ MatSetUp_MPI_Hash, 2741 NULL, 2742 NULL, 2743 MatGetDiagonalBlock_MPIAIJ, 2744 NULL, 2745 /*34*/ MatDuplicate_MPIAIJ, 2746 NULL, 2747 NULL, 2748 NULL, 2749 NULL, 2750 /*39*/ MatAXPY_MPIAIJ, 2751 MatCreateSubMatrices_MPIAIJ, 2752 MatIncreaseOverlap_MPIAIJ, 2753 MatGetValues_MPIAIJ, 2754 MatCopy_MPIAIJ, 2755 /*44*/ MatGetRowMax_MPIAIJ, 2756 MatScale_MPIAIJ, 2757 MatShift_MPIAIJ, 2758 MatDiagonalSet_MPIAIJ, 2759 MatZeroRowsColumns_MPIAIJ, 2760 /*49*/ MatSetRandom_MPIAIJ, 2761 MatGetRowIJ_MPIAIJ, 2762 MatRestoreRowIJ_MPIAIJ, 2763 NULL, 2764 NULL, 2765 /*54*/ MatFDColoringCreate_MPIXAIJ, 2766 NULL, 2767 MatSetUnfactored_MPIAIJ, 2768 MatPermute_MPIAIJ, 2769 NULL, 2770 /*59*/ MatCreateSubMatrix_MPIAIJ, 2771 MatDestroy_MPIAIJ, 2772 MatView_MPIAIJ, 2773 NULL, 2774 NULL, 2775 /*64*/ NULL, 2776 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2777 NULL, 2778 NULL, 2779 NULL, 2780 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2781 MatGetRowMinAbs_MPIAIJ, 2782 NULL, 2783 NULL, 2784 NULL, 2785 NULL, 2786 /*75*/ MatFDColoringApply_AIJ, 2787 MatSetFromOptions_MPIAIJ, 2788 NULL, 2789 NULL, 2790 MatFindZeroDiagonals_MPIAIJ, 2791 /*80*/ NULL, 2792 NULL, 2793 NULL, 2794 /*83*/ MatLoad_MPIAIJ, 2795 MatIsSymmetric_MPIAIJ, 2796 NULL, 2797 NULL, 2798 NULL, 2799 NULL, 2800 /*89*/ NULL, 2801 NULL, 2802 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2803 NULL, 2804 NULL, 2805 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2806 NULL, 2807 NULL, 2808 NULL, 2809 MatBindToCPU_MPIAIJ, 2810 /*99*/ MatProductSetFromOptions_MPIAIJ, 2811 NULL, 2812 NULL, 2813 MatConjugate_MPIAIJ, 2814 NULL, 2815 /*104*/ MatSetValuesRow_MPIAIJ, 2816 MatRealPart_MPIAIJ, 2817 MatImaginaryPart_MPIAIJ, 2818 NULL, 2819 NULL, 2820 /*109*/ NULL, 2821 NULL, 2822 MatGetRowMin_MPIAIJ, 2823 NULL, 2824 MatMissingDiagonal_MPIAIJ, 2825 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2826 NULL, 2827 MatGetGhosts_MPIAIJ, 2828 NULL, 2829 NULL, 2830 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2831 NULL, 2832 NULL, 2833 NULL, 2834 MatGetMultiProcBlock_MPIAIJ, 2835 /*124*/ MatFindNonzeroRows_MPIAIJ, 2836 MatGetColumnReductions_MPIAIJ, 2837 MatInvertBlockDiagonal_MPIAIJ, 2838 MatInvertVariableBlockDiagonal_MPIAIJ, 2839 MatCreateSubMatricesMPI_MPIAIJ, 2840 /*129*/ NULL, 2841 NULL, 2842 NULL, 2843 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2844 NULL, 2845 /*134*/ NULL, 2846 NULL, 2847 NULL, 2848 NULL, 2849 NULL, 2850 /*139*/ MatSetBlockSizes_MPIAIJ, 2851 NULL, 2852 NULL, 2853 MatFDColoringSetUp_MPIXAIJ, 2854 MatFindOffBlockDiagonalEntries_MPIAIJ, 2855 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2856 /*145*/ NULL, 2857 NULL, 2858 NULL, 2859 MatCreateGraph_Simple_AIJ, 2860 NULL, 2861 /*150*/ NULL, 2862 MatEliminateZeros_MPIAIJ}; 2863 2864 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2865 { 2866 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2867 2868 PetscFunctionBegin; 2869 PetscCall(MatStoreValues(aij->A)); 2870 PetscCall(MatStoreValues(aij->B)); 2871 PetscFunctionReturn(PETSC_SUCCESS); 2872 } 2873 2874 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2875 { 2876 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2877 2878 PetscFunctionBegin; 2879 PetscCall(MatRetrieveValues(aij->A)); 2880 PetscCall(MatRetrieveValues(aij->B)); 2881 PetscFunctionReturn(PETSC_SUCCESS); 2882 } 2883 2884 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2885 { 2886 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2887 PetscMPIInt size; 2888 2889 PetscFunctionBegin; 2890 if (B->hash_active) { 2891 B->ops[0] = b->cops; 2892 B->hash_active = PETSC_FALSE; 2893 } 2894 PetscCall(PetscLayoutSetUp(B->rmap)); 2895 PetscCall(PetscLayoutSetUp(B->cmap)); 2896 2897 #if defined(PETSC_USE_CTABLE) 2898 PetscCall(PetscHMapIDestroy(&b->colmap)); 2899 #else 2900 PetscCall(PetscFree(b->colmap)); 2901 #endif 2902 PetscCall(PetscFree(b->garray)); 2903 PetscCall(VecDestroy(&b->lvec)); 2904 PetscCall(VecScatterDestroy(&b->Mvctx)); 2905 2906 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2907 PetscCall(MatDestroy(&b->B)); 2908 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2909 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2910 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2911 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2912 2913 PetscCall(MatDestroy(&b->A)); 2914 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2915 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2916 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2917 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2918 2919 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2920 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2921 B->preallocated = PETSC_TRUE; 2922 B->was_assembled = PETSC_FALSE; 2923 B->assembled = PETSC_FALSE; 2924 PetscFunctionReturn(PETSC_SUCCESS); 2925 } 2926 2927 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2928 { 2929 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2930 2931 PetscFunctionBegin; 2932 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2933 PetscCall(PetscLayoutSetUp(B->rmap)); 2934 PetscCall(PetscLayoutSetUp(B->cmap)); 2935 2936 #if defined(PETSC_USE_CTABLE) 2937 PetscCall(PetscHMapIDestroy(&b->colmap)); 2938 #else 2939 PetscCall(PetscFree(b->colmap)); 2940 #endif 2941 PetscCall(PetscFree(b->garray)); 2942 PetscCall(VecDestroy(&b->lvec)); 2943 PetscCall(VecScatterDestroy(&b->Mvctx)); 2944 2945 PetscCall(MatResetPreallocation(b->A)); 2946 PetscCall(MatResetPreallocation(b->B)); 2947 B->preallocated = PETSC_TRUE; 2948 B->was_assembled = PETSC_FALSE; 2949 B->assembled = PETSC_FALSE; 2950 PetscFunctionReturn(PETSC_SUCCESS); 2951 } 2952 2953 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2954 { 2955 Mat mat; 2956 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2957 2958 PetscFunctionBegin; 2959 *newmat = NULL; 2960 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2961 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2962 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2963 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2964 a = (Mat_MPIAIJ *)mat->data; 2965 2966 mat->factortype = matin->factortype; 2967 mat->assembled = matin->assembled; 2968 mat->insertmode = NOT_SET_VALUES; 2969 mat->preallocated = matin->preallocated; 2970 2971 a->size = oldmat->size; 2972 a->rank = oldmat->rank; 2973 a->donotstash = oldmat->donotstash; 2974 a->roworiented = oldmat->roworiented; 2975 a->rowindices = NULL; 2976 a->rowvalues = NULL; 2977 a->getrowactive = PETSC_FALSE; 2978 2979 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2980 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2981 2982 if (oldmat->colmap) { 2983 #if defined(PETSC_USE_CTABLE) 2984 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2985 #else 2986 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2987 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2988 #endif 2989 } else a->colmap = NULL; 2990 if (oldmat->garray) { 2991 PetscInt len; 2992 len = oldmat->B->cmap->n; 2993 PetscCall(PetscMalloc1(len + 1, &a->garray)); 2994 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2995 } else a->garray = NULL; 2996 2997 /* It may happen MatDuplicate is called with a non-assembled matrix 2998 In fact, MatDuplicate only requires the matrix to be preallocated 2999 This may happen inside a DMCreateMatrix_Shell */ 3000 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3001 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3002 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3003 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3004 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3005 *newmat = mat; 3006 PetscFunctionReturn(PETSC_SUCCESS); 3007 } 3008 3009 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3010 { 3011 PetscBool isbinary, ishdf5; 3012 3013 PetscFunctionBegin; 3014 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3015 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3016 /* force binary viewer to load .info file if it has not yet done so */ 3017 PetscCall(PetscViewerSetUp(viewer)); 3018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3019 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3020 if (isbinary) { 3021 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3022 } else if (ishdf5) { 3023 #if defined(PETSC_HAVE_HDF5) 3024 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3025 #else 3026 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3027 #endif 3028 } else { 3029 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3030 } 3031 PetscFunctionReturn(PETSC_SUCCESS); 3032 } 3033 3034 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3035 { 3036 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3037 PetscInt *rowidxs, *colidxs; 3038 PetscScalar *matvals; 3039 3040 PetscFunctionBegin; 3041 PetscCall(PetscViewerSetUp(viewer)); 3042 3043 /* read in matrix header */ 3044 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3045 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3046 M = header[1]; 3047 N = header[2]; 3048 nz = header[3]; 3049 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3050 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3051 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3052 3053 /* set block sizes from the viewer's .info file */ 3054 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3055 /* set global sizes if not set already */ 3056 if (mat->rmap->N < 0) mat->rmap->N = M; 3057 if (mat->cmap->N < 0) mat->cmap->N = N; 3058 PetscCall(PetscLayoutSetUp(mat->rmap)); 3059 PetscCall(PetscLayoutSetUp(mat->cmap)); 3060 3061 /* check if the matrix sizes are correct */ 3062 PetscCall(MatGetSize(mat, &rows, &cols)); 3063 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3064 3065 /* read in row lengths and build row indices */ 3066 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3067 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3068 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3069 rowidxs[0] = 0; 3070 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3071 if (nz != PETSC_MAX_INT) { 3072 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3073 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3074 } 3075 3076 /* read in column indices and matrix values */ 3077 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3078 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3079 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3080 /* store matrix indices and values */ 3081 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3082 PetscCall(PetscFree(rowidxs)); 3083 PetscCall(PetscFree2(colidxs, matvals)); 3084 PetscFunctionReturn(PETSC_SUCCESS); 3085 } 3086 3087 /* Not scalable because of ISAllGather() unless getting all columns. */ 3088 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3089 { 3090 IS iscol_local; 3091 PetscBool isstride; 3092 PetscMPIInt lisstride = 0, gisstride; 3093 3094 PetscFunctionBegin; 3095 /* check if we are grabbing all columns*/ 3096 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3097 3098 if (isstride) { 3099 PetscInt start, len, mstart, mlen; 3100 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3101 PetscCall(ISGetLocalSize(iscol, &len)); 3102 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3103 if (mstart == start && mlen - mstart == len) lisstride = 1; 3104 } 3105 3106 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3107 if (gisstride) { 3108 PetscInt N; 3109 PetscCall(MatGetSize(mat, NULL, &N)); 3110 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3111 PetscCall(ISSetIdentity(iscol_local)); 3112 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3113 } else { 3114 PetscInt cbs; 3115 PetscCall(ISGetBlockSize(iscol, &cbs)); 3116 PetscCall(ISAllGather(iscol, &iscol_local)); 3117 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3118 } 3119 3120 *isseq = iscol_local; 3121 PetscFunctionReturn(PETSC_SUCCESS); 3122 } 3123 3124 /* 3125 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3126 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3127 3128 Input Parameters: 3129 + mat - matrix 3130 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3131 i.e., mat->rstart <= isrow[i] < mat->rend 3132 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3133 i.e., mat->cstart <= iscol[i] < mat->cend 3134 3135 Output Parameters: 3136 + isrow_d - sequential row index set for retrieving mat->A 3137 . iscol_d - sequential column index set for retrieving mat->A 3138 . iscol_o - sequential column index set for retrieving mat->B 3139 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3140 */ 3141 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3142 { 3143 Vec x, cmap; 3144 const PetscInt *is_idx; 3145 PetscScalar *xarray, *cmaparray; 3146 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3148 Mat B = a->B; 3149 Vec lvec = a->lvec, lcmap; 3150 PetscInt i, cstart, cend, Bn = B->cmap->N; 3151 MPI_Comm comm; 3152 VecScatter Mvctx = a->Mvctx; 3153 3154 PetscFunctionBegin; 3155 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3156 PetscCall(ISGetLocalSize(iscol, &ncols)); 3157 3158 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3159 PetscCall(MatCreateVecs(mat, &x, NULL)); 3160 PetscCall(VecSet(x, -1.0)); 3161 PetscCall(VecDuplicate(x, &cmap)); 3162 PetscCall(VecSet(cmap, -1.0)); 3163 3164 /* Get start indices */ 3165 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3166 isstart -= ncols; 3167 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3168 3169 PetscCall(ISGetIndices(iscol, &is_idx)); 3170 PetscCall(VecGetArray(x, &xarray)); 3171 PetscCall(VecGetArray(cmap, &cmaparray)); 3172 PetscCall(PetscMalloc1(ncols, &idx)); 3173 for (i = 0; i < ncols; i++) { 3174 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3175 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3176 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3177 } 3178 PetscCall(VecRestoreArray(x, &xarray)); 3179 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3180 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3181 3182 /* Get iscol_d */ 3183 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3184 PetscCall(ISGetBlockSize(iscol, &i)); 3185 PetscCall(ISSetBlockSize(*iscol_d, i)); 3186 3187 /* Get isrow_d */ 3188 PetscCall(ISGetLocalSize(isrow, &m)); 3189 rstart = mat->rmap->rstart; 3190 PetscCall(PetscMalloc1(m, &idx)); 3191 PetscCall(ISGetIndices(isrow, &is_idx)); 3192 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3193 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3194 3195 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3196 PetscCall(ISGetBlockSize(isrow, &i)); 3197 PetscCall(ISSetBlockSize(*isrow_d, i)); 3198 3199 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3200 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3201 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3202 3203 PetscCall(VecDuplicate(lvec, &lcmap)); 3204 3205 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3206 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3207 3208 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3209 /* off-process column indices */ 3210 count = 0; 3211 PetscCall(PetscMalloc1(Bn, &idx)); 3212 PetscCall(PetscMalloc1(Bn, &cmap1)); 3213 3214 PetscCall(VecGetArray(lvec, &xarray)); 3215 PetscCall(VecGetArray(lcmap, &cmaparray)); 3216 for (i = 0; i < Bn; i++) { 3217 if (PetscRealPart(xarray[i]) > -1.0) { 3218 idx[count] = i; /* local column index in off-diagonal part B */ 3219 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3220 count++; 3221 } 3222 } 3223 PetscCall(VecRestoreArray(lvec, &xarray)); 3224 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3225 3226 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3227 /* cannot ensure iscol_o has same blocksize as iscol! */ 3228 3229 PetscCall(PetscFree(idx)); 3230 *garray = cmap1; 3231 3232 PetscCall(VecDestroy(&x)); 3233 PetscCall(VecDestroy(&cmap)); 3234 PetscCall(VecDestroy(&lcmap)); 3235 PetscFunctionReturn(PETSC_SUCCESS); 3236 } 3237 3238 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3239 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3240 { 3241 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3242 Mat M = NULL; 3243 MPI_Comm comm; 3244 IS iscol_d, isrow_d, iscol_o; 3245 Mat Asub = NULL, Bsub = NULL; 3246 PetscInt n; 3247 3248 PetscFunctionBegin; 3249 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3250 3251 if (call == MAT_REUSE_MATRIX) { 3252 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3253 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3254 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3255 3256 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3257 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3258 3259 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3260 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3261 3262 /* Update diagonal and off-diagonal portions of submat */ 3263 asub = (Mat_MPIAIJ *)(*submat)->data; 3264 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3265 PetscCall(ISGetLocalSize(iscol_o, &n)); 3266 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3267 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3268 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3269 3270 } else { /* call == MAT_INITIAL_MATRIX) */ 3271 const PetscInt *garray; 3272 PetscInt BsubN; 3273 3274 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3275 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3276 3277 /* Create local submatrices Asub and Bsub */ 3278 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3279 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3280 3281 /* Create submatrix M */ 3282 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3283 3284 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3285 asub = (Mat_MPIAIJ *)M->data; 3286 3287 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3288 n = asub->B->cmap->N; 3289 if (BsubN > n) { 3290 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3291 const PetscInt *idx; 3292 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3293 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3294 3295 PetscCall(PetscMalloc1(n, &idx_new)); 3296 j = 0; 3297 PetscCall(ISGetIndices(iscol_o, &idx)); 3298 for (i = 0; i < n; i++) { 3299 if (j >= BsubN) break; 3300 while (subgarray[i] > garray[j]) j++; 3301 3302 if (subgarray[i] == garray[j]) { 3303 idx_new[i] = idx[j++]; 3304 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3305 } 3306 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3307 3308 PetscCall(ISDestroy(&iscol_o)); 3309 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3310 3311 } else if (BsubN < n) { 3312 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3313 } 3314 3315 PetscCall(PetscFree(garray)); 3316 *submat = M; 3317 3318 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3319 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3320 PetscCall(ISDestroy(&isrow_d)); 3321 3322 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3323 PetscCall(ISDestroy(&iscol_d)); 3324 3325 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3326 PetscCall(ISDestroy(&iscol_o)); 3327 } 3328 PetscFunctionReturn(PETSC_SUCCESS); 3329 } 3330 3331 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3332 { 3333 IS iscol_local = NULL, isrow_d; 3334 PetscInt csize; 3335 PetscInt n, i, j, start, end; 3336 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3337 MPI_Comm comm; 3338 3339 PetscFunctionBegin; 3340 /* If isrow has same processor distribution as mat, 3341 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3342 if (call == MAT_REUSE_MATRIX) { 3343 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3344 if (isrow_d) { 3345 sameRowDist = PETSC_TRUE; 3346 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3347 } else { 3348 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3349 if (iscol_local) { 3350 sameRowDist = PETSC_TRUE; 3351 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3352 } 3353 } 3354 } else { 3355 /* Check if isrow has same processor distribution as mat */ 3356 sameDist[0] = PETSC_FALSE; 3357 PetscCall(ISGetLocalSize(isrow, &n)); 3358 if (!n) { 3359 sameDist[0] = PETSC_TRUE; 3360 } else { 3361 PetscCall(ISGetMinMax(isrow, &i, &j)); 3362 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3363 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3364 } 3365 3366 /* Check if iscol has same processor distribution as mat */ 3367 sameDist[1] = PETSC_FALSE; 3368 PetscCall(ISGetLocalSize(iscol, &n)); 3369 if (!n) { 3370 sameDist[1] = PETSC_TRUE; 3371 } else { 3372 PetscCall(ISGetMinMax(iscol, &i, &j)); 3373 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3374 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3375 } 3376 3377 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3378 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3379 sameRowDist = tsameDist[0]; 3380 } 3381 3382 if (sameRowDist) { 3383 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3384 /* isrow and iscol have same processor distribution as mat */ 3385 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3386 PetscFunctionReturn(PETSC_SUCCESS); 3387 } else { /* sameRowDist */ 3388 /* isrow has same processor distribution as mat */ 3389 if (call == MAT_INITIAL_MATRIX) { 3390 PetscBool sorted; 3391 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3392 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3393 PetscCall(ISGetSize(iscol, &i)); 3394 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3395 3396 PetscCall(ISSorted(iscol_local, &sorted)); 3397 if (sorted) { 3398 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3399 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3400 PetscFunctionReturn(PETSC_SUCCESS); 3401 } 3402 } else { /* call == MAT_REUSE_MATRIX */ 3403 IS iscol_sub; 3404 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3405 if (iscol_sub) { 3406 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3407 PetscFunctionReturn(PETSC_SUCCESS); 3408 } 3409 } 3410 } 3411 } 3412 3413 /* General case: iscol -> iscol_local which has global size of iscol */ 3414 if (call == MAT_REUSE_MATRIX) { 3415 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3416 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3417 } else { 3418 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3419 } 3420 3421 PetscCall(ISGetLocalSize(iscol, &csize)); 3422 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3423 3424 if (call == MAT_INITIAL_MATRIX) { 3425 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3426 PetscCall(ISDestroy(&iscol_local)); 3427 } 3428 PetscFunctionReturn(PETSC_SUCCESS); 3429 } 3430 3431 /*@C 3432 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3433 and "off-diagonal" part of the matrix in CSR format. 3434 3435 Collective 3436 3437 Input Parameters: 3438 + comm - MPI communicator 3439 . A - "diagonal" portion of matrix 3440 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3441 - garray - global index of `B` columns 3442 3443 Output Parameter: 3444 . mat - the matrix, with input `A` as its local diagonal matrix 3445 3446 Level: advanced 3447 3448 Notes: 3449 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3450 3451 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3452 3453 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3454 @*/ 3455 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3456 { 3457 Mat_MPIAIJ *maij; 3458 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3459 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3460 const PetscScalar *oa; 3461 Mat Bnew; 3462 PetscInt m, n, N; 3463 MatType mpi_mat_type; 3464 3465 PetscFunctionBegin; 3466 PetscCall(MatCreate(comm, mat)); 3467 PetscCall(MatGetSize(A, &m, &n)); 3468 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3469 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3470 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3471 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3472 3473 /* Get global columns of mat */ 3474 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3475 3476 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3477 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3478 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3479 PetscCall(MatSetType(*mat, mpi_mat_type)); 3480 3481 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3482 maij = (Mat_MPIAIJ *)(*mat)->data; 3483 3484 (*mat)->preallocated = PETSC_TRUE; 3485 3486 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3487 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3488 3489 /* Set A as diagonal portion of *mat */ 3490 maij->A = A; 3491 3492 nz = oi[m]; 3493 for (i = 0; i < nz; i++) { 3494 col = oj[i]; 3495 oj[i] = garray[col]; 3496 } 3497 3498 /* Set Bnew as off-diagonal portion of *mat */ 3499 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3500 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3501 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3502 bnew = (Mat_SeqAIJ *)Bnew->data; 3503 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3504 maij->B = Bnew; 3505 3506 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3507 3508 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3509 b->free_a = PETSC_FALSE; 3510 b->free_ij = PETSC_FALSE; 3511 PetscCall(MatDestroy(&B)); 3512 3513 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3514 bnew->free_a = PETSC_TRUE; 3515 bnew->free_ij = PETSC_TRUE; 3516 3517 /* condense columns of maij->B */ 3518 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3519 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3520 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3521 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3522 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3523 PetscFunctionReturn(PETSC_SUCCESS); 3524 } 3525 3526 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3527 3528 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3529 { 3530 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3531 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3532 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3533 Mat M, Msub, B = a->B; 3534 MatScalar *aa; 3535 Mat_SeqAIJ *aij; 3536 PetscInt *garray = a->garray, *colsub, Ncols; 3537 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3538 IS iscol_sub, iscmap; 3539 const PetscInt *is_idx, *cmap; 3540 PetscBool allcolumns = PETSC_FALSE; 3541 MPI_Comm comm; 3542 3543 PetscFunctionBegin; 3544 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3545 if (call == MAT_REUSE_MATRIX) { 3546 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3547 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3548 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3549 3550 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3551 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3552 3553 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3554 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3555 3556 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3557 3558 } else { /* call == MAT_INITIAL_MATRIX) */ 3559 PetscBool flg; 3560 3561 PetscCall(ISGetLocalSize(iscol, &n)); 3562 PetscCall(ISGetSize(iscol, &Ncols)); 3563 3564 /* (1) iscol -> nonscalable iscol_local */ 3565 /* Check for special case: each processor gets entire matrix columns */ 3566 PetscCall(ISIdentity(iscol_local, &flg)); 3567 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3568 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3569 if (allcolumns) { 3570 iscol_sub = iscol_local; 3571 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3572 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3573 3574 } else { 3575 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3576 PetscInt *idx, *cmap1, k; 3577 PetscCall(PetscMalloc1(Ncols, &idx)); 3578 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3579 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3580 count = 0; 3581 k = 0; 3582 for (i = 0; i < Ncols; i++) { 3583 j = is_idx[i]; 3584 if (j >= cstart && j < cend) { 3585 /* diagonal part of mat */ 3586 idx[count] = j; 3587 cmap1[count++] = i; /* column index in submat */ 3588 } else if (Bn) { 3589 /* off-diagonal part of mat */ 3590 if (j == garray[k]) { 3591 idx[count] = j; 3592 cmap1[count++] = i; /* column index in submat */ 3593 } else if (j > garray[k]) { 3594 while (j > garray[k] && k < Bn - 1) k++; 3595 if (j == garray[k]) { 3596 idx[count] = j; 3597 cmap1[count++] = i; /* column index in submat */ 3598 } 3599 } 3600 } 3601 } 3602 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3603 3604 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3605 PetscCall(ISGetBlockSize(iscol, &cbs)); 3606 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3607 3608 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3609 } 3610 3611 /* (3) Create sequential Msub */ 3612 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3613 } 3614 3615 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3616 aij = (Mat_SeqAIJ *)(Msub)->data; 3617 ii = aij->i; 3618 PetscCall(ISGetIndices(iscmap, &cmap)); 3619 3620 /* 3621 m - number of local rows 3622 Ncols - number of columns (same on all processors) 3623 rstart - first row in new global matrix generated 3624 */ 3625 PetscCall(MatGetSize(Msub, &m, NULL)); 3626 3627 if (call == MAT_INITIAL_MATRIX) { 3628 /* (4) Create parallel newmat */ 3629 PetscMPIInt rank, size; 3630 PetscInt csize; 3631 3632 PetscCallMPI(MPI_Comm_size(comm, &size)); 3633 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3634 3635 /* 3636 Determine the number of non-zeros in the diagonal and off-diagonal 3637 portions of the matrix in order to do correct preallocation 3638 */ 3639 3640 /* first get start and end of "diagonal" columns */ 3641 PetscCall(ISGetLocalSize(iscol, &csize)); 3642 if (csize == PETSC_DECIDE) { 3643 PetscCall(ISGetSize(isrow, &mglobal)); 3644 if (mglobal == Ncols) { /* square matrix */ 3645 nlocal = m; 3646 } else { 3647 nlocal = Ncols / size + ((Ncols % size) > rank); 3648 } 3649 } else { 3650 nlocal = csize; 3651 } 3652 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3653 rstart = rend - nlocal; 3654 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3655 3656 /* next, compute all the lengths */ 3657 jj = aij->j; 3658 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3659 olens = dlens + m; 3660 for (i = 0; i < m; i++) { 3661 jend = ii[i + 1] - ii[i]; 3662 olen = 0; 3663 dlen = 0; 3664 for (j = 0; j < jend; j++) { 3665 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3666 else dlen++; 3667 jj++; 3668 } 3669 olens[i] = olen; 3670 dlens[i] = dlen; 3671 } 3672 3673 PetscCall(ISGetBlockSize(isrow, &bs)); 3674 PetscCall(ISGetBlockSize(iscol, &cbs)); 3675 3676 PetscCall(MatCreate(comm, &M)); 3677 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3678 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3679 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3680 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3681 PetscCall(PetscFree(dlens)); 3682 3683 } else { /* call == MAT_REUSE_MATRIX */ 3684 M = *newmat; 3685 PetscCall(MatGetLocalSize(M, &i, NULL)); 3686 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3687 PetscCall(MatZeroEntries(M)); 3688 /* 3689 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3690 rather than the slower MatSetValues(). 3691 */ 3692 M->was_assembled = PETSC_TRUE; 3693 M->assembled = PETSC_FALSE; 3694 } 3695 3696 /* (5) Set values of Msub to *newmat */ 3697 PetscCall(PetscMalloc1(count, &colsub)); 3698 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3699 3700 jj = aij->j; 3701 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3702 for (i = 0; i < m; i++) { 3703 row = rstart + i; 3704 nz = ii[i + 1] - ii[i]; 3705 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3706 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3707 jj += nz; 3708 aa += nz; 3709 } 3710 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3711 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3712 3713 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3714 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3715 3716 PetscCall(PetscFree(colsub)); 3717 3718 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3719 if (call == MAT_INITIAL_MATRIX) { 3720 *newmat = M; 3721 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3722 PetscCall(MatDestroy(&Msub)); 3723 3724 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3725 PetscCall(ISDestroy(&iscol_sub)); 3726 3727 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3728 PetscCall(ISDestroy(&iscmap)); 3729 3730 if (iscol_local) { 3731 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3732 PetscCall(ISDestroy(&iscol_local)); 3733 } 3734 } 3735 PetscFunctionReturn(PETSC_SUCCESS); 3736 } 3737 3738 /* 3739 Not great since it makes two copies of the submatrix, first an SeqAIJ 3740 in local and then by concatenating the local matrices the end result. 3741 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3742 3743 This requires a sequential iscol with all indices. 3744 */ 3745 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3746 { 3747 PetscMPIInt rank, size; 3748 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3749 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3750 Mat M, Mreuse; 3751 MatScalar *aa, *vwork; 3752 MPI_Comm comm; 3753 Mat_SeqAIJ *aij; 3754 PetscBool colflag, allcolumns = PETSC_FALSE; 3755 3756 PetscFunctionBegin; 3757 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3758 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3759 PetscCallMPI(MPI_Comm_size(comm, &size)); 3760 3761 /* Check for special case: each processor gets entire matrix columns */ 3762 PetscCall(ISIdentity(iscol, &colflag)); 3763 PetscCall(ISGetLocalSize(iscol, &n)); 3764 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3765 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3766 3767 if (call == MAT_REUSE_MATRIX) { 3768 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3769 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3770 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3771 } else { 3772 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3773 } 3774 3775 /* 3776 m - number of local rows 3777 n - number of columns (same on all processors) 3778 rstart - first row in new global matrix generated 3779 */ 3780 PetscCall(MatGetSize(Mreuse, &m, &n)); 3781 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3782 if (call == MAT_INITIAL_MATRIX) { 3783 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3784 ii = aij->i; 3785 jj = aij->j; 3786 3787 /* 3788 Determine the number of non-zeros in the diagonal and off-diagonal 3789 portions of the matrix in order to do correct preallocation 3790 */ 3791 3792 /* first get start and end of "diagonal" columns */ 3793 if (csize == PETSC_DECIDE) { 3794 PetscCall(ISGetSize(isrow, &mglobal)); 3795 if (mglobal == n) { /* square matrix */ 3796 nlocal = m; 3797 } else { 3798 nlocal = n / size + ((n % size) > rank); 3799 } 3800 } else { 3801 nlocal = csize; 3802 } 3803 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3804 rstart = rend - nlocal; 3805 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3806 3807 /* next, compute all the lengths */ 3808 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3809 olens = dlens + m; 3810 for (i = 0; i < m; i++) { 3811 jend = ii[i + 1] - ii[i]; 3812 olen = 0; 3813 dlen = 0; 3814 for (j = 0; j < jend; j++) { 3815 if (*jj < rstart || *jj >= rend) olen++; 3816 else dlen++; 3817 jj++; 3818 } 3819 olens[i] = olen; 3820 dlens[i] = dlen; 3821 } 3822 PetscCall(MatCreate(comm, &M)); 3823 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3824 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3825 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3826 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3827 PetscCall(PetscFree(dlens)); 3828 } else { 3829 PetscInt ml, nl; 3830 3831 M = *newmat; 3832 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3833 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3834 PetscCall(MatZeroEntries(M)); 3835 /* 3836 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3837 rather than the slower MatSetValues(). 3838 */ 3839 M->was_assembled = PETSC_TRUE; 3840 M->assembled = PETSC_FALSE; 3841 } 3842 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3843 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3844 ii = aij->i; 3845 jj = aij->j; 3846 3847 /* trigger copy to CPU if needed */ 3848 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3849 for (i = 0; i < m; i++) { 3850 row = rstart + i; 3851 nz = ii[i + 1] - ii[i]; 3852 cwork = jj; 3853 jj += nz; 3854 vwork = aa; 3855 aa += nz; 3856 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3857 } 3858 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3859 3860 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3861 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3862 *newmat = M; 3863 3864 /* save submatrix used in processor for next request */ 3865 if (call == MAT_INITIAL_MATRIX) { 3866 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3867 PetscCall(MatDestroy(&Mreuse)); 3868 } 3869 PetscFunctionReturn(PETSC_SUCCESS); 3870 } 3871 3872 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3873 { 3874 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3875 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3876 const PetscInt *JJ; 3877 PetscBool nooffprocentries; 3878 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3879 3880 PetscFunctionBegin; 3881 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3882 3883 PetscCall(PetscLayoutSetUp(B->rmap)); 3884 PetscCall(PetscLayoutSetUp(B->cmap)); 3885 m = B->rmap->n; 3886 cstart = B->cmap->rstart; 3887 cend = B->cmap->rend; 3888 rstart = B->rmap->rstart; 3889 3890 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3891 3892 if (PetscDefined(USE_DEBUG)) { 3893 for (i = 0; i < m; i++) { 3894 nnz = Ii[i + 1] - Ii[i]; 3895 JJ = J + Ii[i]; 3896 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3897 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3898 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3899 } 3900 } 3901 3902 for (i = 0; i < m; i++) { 3903 nnz = Ii[i + 1] - Ii[i]; 3904 JJ = J + Ii[i]; 3905 nnz_max = PetscMax(nnz_max, nnz); 3906 d = 0; 3907 for (j = 0; j < nnz; j++) { 3908 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3909 } 3910 d_nnz[i] = d; 3911 o_nnz[i] = nnz - d; 3912 } 3913 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3914 PetscCall(PetscFree2(d_nnz, o_nnz)); 3915 3916 for (i = 0; i < m; i++) { 3917 ii = i + rstart; 3918 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3919 } 3920 nooffprocentries = B->nooffprocentries; 3921 B->nooffprocentries = PETSC_TRUE; 3922 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3923 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3924 B->nooffprocentries = nooffprocentries; 3925 3926 /* count number of entries below block diagonal */ 3927 PetscCall(PetscFree(Aij->ld)); 3928 PetscCall(PetscCalloc1(m, &ld)); 3929 Aij->ld = ld; 3930 for (i = 0; i < m; i++) { 3931 nnz = Ii[i + 1] - Ii[i]; 3932 j = 0; 3933 while (j < nnz && J[j] < cstart) j++; 3934 ld[i] = j; 3935 J += nnz; 3936 } 3937 3938 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3939 PetscFunctionReturn(PETSC_SUCCESS); 3940 } 3941 3942 /*@ 3943 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3944 (the default parallel PETSc format). 3945 3946 Collective 3947 3948 Input Parameters: 3949 + B - the matrix 3950 . i - the indices into j for the start of each local row (starts with zero) 3951 . j - the column indices for each local row (starts with zero) 3952 - v - optional values in the matrix 3953 3954 Level: developer 3955 3956 Notes: 3957 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3958 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3959 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3960 3961 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3962 3963 The format which is used for the sparse matrix input, is equivalent to a 3964 row-major ordering.. i.e for the following matrix, the input data expected is 3965 as shown 3966 3967 .vb 3968 1 0 0 3969 2 0 3 P0 3970 ------- 3971 4 5 6 P1 3972 3973 Process0 [P0] rows_owned=[0,1] 3974 i = {0,1,3} [size = nrow+1 = 2+1] 3975 j = {0,0,2} [size = 3] 3976 v = {1,2,3} [size = 3] 3977 3978 Process1 [P1] rows_owned=[2] 3979 i = {0,3} [size = nrow+1 = 1+1] 3980 j = {0,1,2} [size = 3] 3981 v = {4,5,6} [size = 3] 3982 .ve 3983 3984 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 3985 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3986 @*/ 3987 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3988 { 3989 PetscFunctionBegin; 3990 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3991 PetscFunctionReturn(PETSC_SUCCESS); 3992 } 3993 3994 /*@C 3995 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3996 (the default parallel PETSc format). For good matrix assembly performance 3997 the user should preallocate the matrix storage by setting the parameters 3998 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 3999 4000 Collective 4001 4002 Input Parameters: 4003 + B - the matrix 4004 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4005 (same value is used for all local rows) 4006 . d_nnz - array containing the number of nonzeros in the various rows of the 4007 DIAGONAL portion of the local submatrix (possibly different for each row) 4008 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4009 The size of this array is equal to the number of local rows, i.e 'm'. 4010 For matrices that will be factored, you must leave room for (and set) 4011 the diagonal entry even if it is zero. 4012 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4013 submatrix (same value is used for all local rows). 4014 - o_nnz - array containing the number of nonzeros in the various rows of the 4015 OFF-DIAGONAL portion of the local submatrix (possibly different for 4016 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4017 structure. The size of this array is equal to the number 4018 of local rows, i.e 'm'. 4019 4020 Example Usage: 4021 Consider the following 8x8 matrix with 34 non-zero values, that is 4022 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4023 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4024 as follows 4025 4026 .vb 4027 1 2 0 | 0 3 0 | 0 4 4028 Proc0 0 5 6 | 7 0 0 | 8 0 4029 9 0 10 | 11 0 0 | 12 0 4030 ------------------------------------- 4031 13 0 14 | 15 16 17 | 0 0 4032 Proc1 0 18 0 | 19 20 21 | 0 0 4033 0 0 0 | 22 23 0 | 24 0 4034 ------------------------------------- 4035 Proc2 25 26 27 | 0 0 28 | 29 0 4036 30 0 0 | 31 32 33 | 0 34 4037 .ve 4038 4039 This can be represented as a collection of submatrices as 4040 .vb 4041 A B C 4042 D E F 4043 G H I 4044 .ve 4045 4046 Where the submatrices A,B,C are owned by proc0, D,E,F are 4047 owned by proc1, G,H,I are owned by proc2. 4048 4049 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4050 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4051 The 'M','N' parameters are 8,8, and have the same values on all procs. 4052 4053 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4054 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4055 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4056 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4057 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4058 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4059 4060 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4061 allocated for every row of the local diagonal submatrix, and `o_nz` 4062 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4063 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4064 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4065 In this case, the values of `d_nz`, `o_nz` are 4066 .vb 4067 proc0 dnz = 2, o_nz = 2 4068 proc1 dnz = 3, o_nz = 2 4069 proc2 dnz = 1, o_nz = 4 4070 .ve 4071 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4072 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4073 for proc3. i.e we are using 12+15+10=37 storage locations to store 4074 34 values. 4075 4076 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4077 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4078 In the above case the values for `d_nnz`, `o_nnz` are 4079 .vb 4080 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4081 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4082 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4083 .ve 4084 Here the space allocated is sum of all the above values i.e 34, and 4085 hence pre-allocation is perfect. 4086 4087 Level: intermediate 4088 4089 Notes: 4090 If the *_nnz parameter is given then the *_nz parameter is ignored 4091 4092 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4093 storage. The stored row and column indices begin with zero. 4094 See [Sparse Matrices](sec_matsparse) for details. 4095 4096 The parallel matrix is partitioned such that the first m0 rows belong to 4097 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4098 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4099 4100 The DIAGONAL portion of the local submatrix of a processor can be defined 4101 as the submatrix which is obtained by extraction the part corresponding to 4102 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4103 first row that belongs to the processor, r2 is the last row belonging to 4104 the this processor, and c1-c2 is range of indices of the local part of a 4105 vector suitable for applying the matrix to. This is an mxn matrix. In the 4106 common case of a square matrix, the row and column ranges are the same and 4107 the DIAGONAL part is also square. The remaining portion of the local 4108 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4109 4110 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4111 4112 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4113 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4114 You can also run with the option `-info` and look for messages with the string 4115 malloc in them to see if additional memory allocation was needed. 4116 4117 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4118 `MatGetInfo()`, `PetscSplitOwnership()` 4119 @*/ 4120 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4121 { 4122 PetscFunctionBegin; 4123 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4124 PetscValidType(B, 1); 4125 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4126 PetscFunctionReturn(PETSC_SUCCESS); 4127 } 4128 4129 /*@ 4130 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4131 CSR format for the local rows. 4132 4133 Collective 4134 4135 Input Parameters: 4136 + comm - MPI communicator 4137 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4138 . n - This value should be the same as the local size used in creating the 4139 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4140 calculated if N is given) For square matrices n is almost always m. 4141 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4142 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4143 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4144 . j - column indices 4145 - a - optional matrix values 4146 4147 Output Parameter: 4148 . mat - the matrix 4149 4150 Level: intermediate 4151 4152 Notes: 4153 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4154 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4155 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4156 4157 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4158 4159 The format which is used for the sparse matrix input, is equivalent to a 4160 row-major ordering.. i.e for the following matrix, the input data expected is 4161 as shown 4162 4163 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4164 .vb 4165 1 0 0 4166 2 0 3 P0 4167 ------- 4168 4 5 6 P1 4169 4170 Process0 [P0] rows_owned=[0,1] 4171 i = {0,1,3} [size = nrow+1 = 2+1] 4172 j = {0,0,2} [size = 3] 4173 v = {1,2,3} [size = 3] 4174 4175 Process1 [P1] rows_owned=[2] 4176 i = {0,3} [size = nrow+1 = 1+1] 4177 j = {0,1,2} [size = 3] 4178 v = {4,5,6} [size = 3] 4179 .ve 4180 4181 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4182 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4183 @*/ 4184 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4185 { 4186 PetscFunctionBegin; 4187 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4188 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4189 PetscCall(MatCreate(comm, mat)); 4190 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4191 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4192 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4193 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4194 PetscFunctionReturn(PETSC_SUCCESS); 4195 } 4196 4197 /*@ 4198 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4199 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4200 from `MatCreateMPIAIJWithArrays()` 4201 4202 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4203 4204 Collective 4205 4206 Input Parameters: 4207 + mat - the matrix 4208 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4209 . n - This value should be the same as the local size used in creating the 4210 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4211 calculated if N is given) For square matrices n is almost always m. 4212 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4213 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4214 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4215 . J - column indices 4216 - v - matrix values 4217 4218 Level: deprecated 4219 4220 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4221 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()` 4222 @*/ 4223 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4224 { 4225 PetscInt nnz, i; 4226 PetscBool nooffprocentries; 4227 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4228 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4229 PetscScalar *ad, *ao; 4230 PetscInt ldi, Iii, md; 4231 const PetscInt *Adi = Ad->i; 4232 PetscInt *ld = Aij->ld; 4233 4234 PetscFunctionBegin; 4235 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4236 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4237 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4238 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4239 4240 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4241 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4242 4243 for (i = 0; i < m; i++) { 4244 nnz = Ii[i + 1] - Ii[i]; 4245 Iii = Ii[i]; 4246 ldi = ld[i]; 4247 md = Adi[i + 1] - Adi[i]; 4248 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4249 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4250 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4251 ad += md; 4252 ao += nnz - md; 4253 } 4254 nooffprocentries = mat->nooffprocentries; 4255 mat->nooffprocentries = PETSC_TRUE; 4256 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4257 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4258 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4259 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4260 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4261 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4262 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4263 mat->nooffprocentries = nooffprocentries; 4264 PetscFunctionReturn(PETSC_SUCCESS); 4265 } 4266 4267 /*@ 4268 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4269 4270 Collective 4271 4272 Input Parameters: 4273 + mat - the matrix 4274 - v - matrix values, stored by row 4275 4276 Level: intermediate 4277 4278 Note: 4279 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4280 4281 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4282 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4283 @*/ 4284 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4285 { 4286 PetscInt nnz, i, m; 4287 PetscBool nooffprocentries; 4288 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4289 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4290 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4291 PetscScalar *ad, *ao; 4292 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4293 PetscInt ldi, Iii, md; 4294 PetscInt *ld = Aij->ld; 4295 4296 PetscFunctionBegin; 4297 m = mat->rmap->n; 4298 4299 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4300 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4301 Iii = 0; 4302 for (i = 0; i < m; i++) { 4303 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4304 ldi = ld[i]; 4305 md = Adi[i + 1] - Adi[i]; 4306 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4307 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4308 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4309 ad += md; 4310 ao += nnz - md; 4311 Iii += nnz; 4312 } 4313 nooffprocentries = mat->nooffprocentries; 4314 mat->nooffprocentries = PETSC_TRUE; 4315 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4316 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4317 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4318 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4319 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4320 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4321 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4322 mat->nooffprocentries = nooffprocentries; 4323 PetscFunctionReturn(PETSC_SUCCESS); 4324 } 4325 4326 /*@C 4327 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4328 (the default parallel PETSc format). For good matrix assembly performance 4329 the user should preallocate the matrix storage by setting the parameters 4330 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4331 4332 Collective 4333 4334 Input Parameters: 4335 + comm - MPI communicator 4336 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4337 This value should be the same as the local size used in creating the 4338 y vector for the matrix-vector product y = Ax. 4339 . n - This value should be the same as the local size used in creating the 4340 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4341 calculated if N is given) For square matrices n is almost always m. 4342 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4343 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4344 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4345 (same value is used for all local rows) 4346 . d_nnz - array containing the number of nonzeros in the various rows of the 4347 DIAGONAL portion of the local submatrix (possibly different for each row) 4348 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4349 The size of this array is equal to the number of local rows, i.e 'm'. 4350 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4351 submatrix (same value is used for all local rows). 4352 - o_nnz - array containing the number of nonzeros in the various rows of the 4353 OFF-DIAGONAL portion of the local submatrix (possibly different for 4354 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4355 structure. The size of this array is equal to the number 4356 of local rows, i.e 'm'. 4357 4358 Output Parameter: 4359 . A - the matrix 4360 4361 Options Database Keys: 4362 + -mat_no_inode - Do not use inodes 4363 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4364 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4365 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4366 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4367 4368 Level: intermediate 4369 4370 Notes: 4371 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4372 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4373 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4374 4375 If the *_nnz parameter is given then the *_nz parameter is ignored 4376 4377 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4378 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4379 storage requirements for this matrix. 4380 4381 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4382 processor than it must be used on all processors that share the object for 4383 that argument. 4384 4385 The user MUST specify either the local or global matrix dimensions 4386 (possibly both). 4387 4388 The parallel matrix is partitioned across processors such that the 4389 first m0 rows belong to process 0, the next m1 rows belong to 4390 process 1, the next m2 rows belong to process 2 etc.. where 4391 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4392 values corresponding to [m x N] submatrix. 4393 4394 The columns are logically partitioned with the n0 columns belonging 4395 to 0th partition, the next n1 columns belonging to the next 4396 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4397 4398 The DIAGONAL portion of the local submatrix on any given processor 4399 is the submatrix corresponding to the rows and columns m,n 4400 corresponding to the given processor. i.e diagonal matrix on 4401 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4402 etc. The remaining portion of the local submatrix [m x (N-n)] 4403 constitute the OFF-DIAGONAL portion. The example below better 4404 illustrates this concept. 4405 4406 For a square global matrix we define each processor's diagonal portion 4407 to be its local rows and the corresponding columns (a square submatrix); 4408 each processor's off-diagonal portion encompasses the remainder of the 4409 local matrix (a rectangular submatrix). 4410 4411 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4412 4413 When calling this routine with a single process communicator, a matrix of 4414 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4415 type of communicator, use the construction mechanism 4416 .vb 4417 MatCreate(..., &A); 4418 MatSetType(A, MATMPIAIJ); 4419 MatSetSizes(A, m, n, M, N); 4420 MatMPIAIJSetPreallocation(A, ...); 4421 .ve 4422 4423 By default, this format uses inodes (identical nodes) when possible. 4424 We search for consecutive rows with the same nonzero structure, thereby 4425 reusing matrix information to achieve increased efficiency. 4426 4427 Example Usage: 4428 Consider the following 8x8 matrix with 34 non-zero values, that is 4429 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4430 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4431 as follows 4432 4433 .vb 4434 1 2 0 | 0 3 0 | 0 4 4435 Proc0 0 5 6 | 7 0 0 | 8 0 4436 9 0 10 | 11 0 0 | 12 0 4437 ------------------------------------- 4438 13 0 14 | 15 16 17 | 0 0 4439 Proc1 0 18 0 | 19 20 21 | 0 0 4440 0 0 0 | 22 23 0 | 24 0 4441 ------------------------------------- 4442 Proc2 25 26 27 | 0 0 28 | 29 0 4443 30 0 0 | 31 32 33 | 0 34 4444 .ve 4445 4446 This can be represented as a collection of submatrices as 4447 4448 .vb 4449 A B C 4450 D E F 4451 G H I 4452 .ve 4453 4454 Where the submatrices A,B,C are owned by proc0, D,E,F are 4455 owned by proc1, G,H,I are owned by proc2. 4456 4457 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4458 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4459 The 'M','N' parameters are 8,8, and have the same values on all procs. 4460 4461 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4462 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4463 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4464 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4465 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4466 matrix, ans [DF] as another SeqAIJ matrix. 4467 4468 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4469 allocated for every row of the local diagonal submatrix, and `o_nz` 4470 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4471 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4472 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4473 In this case, the values of `d_nz`,`o_nz` are 4474 .vb 4475 proc0 dnz = 2, o_nz = 2 4476 proc1 dnz = 3, o_nz = 2 4477 proc2 dnz = 1, o_nz = 4 4478 .ve 4479 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4480 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4481 for proc3. i.e we are using 12+15+10=37 storage locations to store 4482 34 values. 4483 4484 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4485 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4486 In the above case the values for d_nnz,o_nnz are 4487 .vb 4488 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4489 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4490 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4491 .ve 4492 Here the space allocated is sum of all the above values i.e 34, and 4493 hence pre-allocation is perfect. 4494 4495 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4496 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4497 @*/ 4498 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4499 { 4500 PetscMPIInt size; 4501 4502 PetscFunctionBegin; 4503 PetscCall(MatCreate(comm, A)); 4504 PetscCall(MatSetSizes(*A, m, n, M, N)); 4505 PetscCallMPI(MPI_Comm_size(comm, &size)); 4506 if (size > 1) { 4507 PetscCall(MatSetType(*A, MATMPIAIJ)); 4508 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4509 } else { 4510 PetscCall(MatSetType(*A, MATSEQAIJ)); 4511 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4512 } 4513 PetscFunctionReturn(PETSC_SUCCESS); 4514 } 4515 4516 /*MC 4517 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4518 4519 Synopsis: 4520 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4521 4522 Not Collective 4523 4524 Input Parameter: 4525 . A - the `MATMPIAIJ` matrix 4526 4527 Output Parameters: 4528 + Ad - the diagonal portion of the matrix 4529 . Ao - the off diagonal portion of the matrix 4530 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4531 - ierr - error code 4532 4533 Level: advanced 4534 4535 Note: 4536 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4537 4538 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4539 M*/ 4540 4541 /*MC 4542 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4543 4544 Synopsis: 4545 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4546 4547 Not Collective 4548 4549 Input Parameters: 4550 + A - the `MATMPIAIJ` matrix 4551 . Ad - the diagonal portion of the matrix 4552 . Ao - the off diagonal portion of the matrix 4553 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4554 - ierr - error code 4555 4556 Level: advanced 4557 4558 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4559 M*/ 4560 4561 /*@C 4562 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4563 4564 Not Collective 4565 4566 Input Parameter: 4567 . A - The `MATMPIAIJ` matrix 4568 4569 Output Parameters: 4570 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4571 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4572 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4573 4574 Level: intermediate 4575 4576 Note: 4577 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4578 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4579 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4580 local column numbers to global column numbers in the original matrix. 4581 4582 Fortran Notes: 4583 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4584 4585 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4586 @*/ 4587 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4588 { 4589 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4590 PetscBool flg; 4591 4592 PetscFunctionBegin; 4593 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4594 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4595 if (Ad) *Ad = a->A; 4596 if (Ao) *Ao = a->B; 4597 if (colmap) *colmap = a->garray; 4598 PetscFunctionReturn(PETSC_SUCCESS); 4599 } 4600 4601 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4602 { 4603 PetscInt m, N, i, rstart, nnz, Ii; 4604 PetscInt *indx; 4605 PetscScalar *values; 4606 MatType rootType; 4607 4608 PetscFunctionBegin; 4609 PetscCall(MatGetSize(inmat, &m, &N)); 4610 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4611 PetscInt *dnz, *onz, sum, bs, cbs; 4612 4613 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4614 /* Check sum(n) = N */ 4615 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4616 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4617 4618 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4619 rstart -= m; 4620 4621 MatPreallocateBegin(comm, m, n, dnz, onz); 4622 for (i = 0; i < m; i++) { 4623 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4624 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4625 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4626 } 4627 4628 PetscCall(MatCreate(comm, outmat)); 4629 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4630 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4631 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4632 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4633 PetscCall(MatSetType(*outmat, rootType)); 4634 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4635 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4636 MatPreallocateEnd(dnz, onz); 4637 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4638 } 4639 4640 /* numeric phase */ 4641 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4642 for (i = 0; i < m; i++) { 4643 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4644 Ii = i + rstart; 4645 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4646 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4647 } 4648 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4649 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4650 PetscFunctionReturn(PETSC_SUCCESS); 4651 } 4652 4653 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4654 { 4655 PetscMPIInt rank; 4656 PetscInt m, N, i, rstart, nnz; 4657 size_t len; 4658 const PetscInt *indx; 4659 PetscViewer out; 4660 char *name; 4661 Mat B; 4662 const PetscScalar *values; 4663 4664 PetscFunctionBegin; 4665 PetscCall(MatGetLocalSize(A, &m, NULL)); 4666 PetscCall(MatGetSize(A, NULL, &N)); 4667 /* Should this be the type of the diagonal block of A? */ 4668 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4669 PetscCall(MatSetSizes(B, m, N, m, N)); 4670 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4671 PetscCall(MatSetType(B, MATSEQAIJ)); 4672 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4673 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4674 for (i = 0; i < m; i++) { 4675 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4676 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4677 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4678 } 4679 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4680 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4681 4682 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4683 PetscCall(PetscStrlen(outfile, &len)); 4684 PetscCall(PetscMalloc1(len + 6, &name)); 4685 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4686 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4687 PetscCall(PetscFree(name)); 4688 PetscCall(MatView(B, out)); 4689 PetscCall(PetscViewerDestroy(&out)); 4690 PetscCall(MatDestroy(&B)); 4691 PetscFunctionReturn(PETSC_SUCCESS); 4692 } 4693 4694 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4695 { 4696 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4697 4698 PetscFunctionBegin; 4699 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4700 PetscCall(PetscFree(merge->id_r)); 4701 PetscCall(PetscFree(merge->len_s)); 4702 PetscCall(PetscFree(merge->len_r)); 4703 PetscCall(PetscFree(merge->bi)); 4704 PetscCall(PetscFree(merge->bj)); 4705 PetscCall(PetscFree(merge->buf_ri[0])); 4706 PetscCall(PetscFree(merge->buf_ri)); 4707 PetscCall(PetscFree(merge->buf_rj[0])); 4708 PetscCall(PetscFree(merge->buf_rj)); 4709 PetscCall(PetscFree(merge->coi)); 4710 PetscCall(PetscFree(merge->coj)); 4711 PetscCall(PetscFree(merge->owners_co)); 4712 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4713 PetscCall(PetscFree(merge)); 4714 PetscFunctionReturn(PETSC_SUCCESS); 4715 } 4716 4717 #include <../src/mat/utils/freespace.h> 4718 #include <petscbt.h> 4719 4720 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4721 { 4722 MPI_Comm comm; 4723 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4724 PetscMPIInt size, rank, taga, *len_s; 4725 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4726 PetscInt proc, m; 4727 PetscInt **buf_ri, **buf_rj; 4728 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4729 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4730 MPI_Request *s_waits, *r_waits; 4731 MPI_Status *status; 4732 const MatScalar *aa, *a_a; 4733 MatScalar **abuf_r, *ba_i; 4734 Mat_Merge_SeqsToMPI *merge; 4735 PetscContainer container; 4736 4737 PetscFunctionBegin; 4738 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4739 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4740 4741 PetscCallMPI(MPI_Comm_size(comm, &size)); 4742 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4743 4744 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4745 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4746 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4747 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4748 aa = a_a; 4749 4750 bi = merge->bi; 4751 bj = merge->bj; 4752 buf_ri = merge->buf_ri; 4753 buf_rj = merge->buf_rj; 4754 4755 PetscCall(PetscMalloc1(size, &status)); 4756 owners = merge->rowmap->range; 4757 len_s = merge->len_s; 4758 4759 /* send and recv matrix values */ 4760 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4761 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4762 4763 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4764 for (proc = 0, k = 0; proc < size; proc++) { 4765 if (!len_s[proc]) continue; 4766 i = owners[proc]; 4767 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4768 k++; 4769 } 4770 4771 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4772 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4773 PetscCall(PetscFree(status)); 4774 4775 PetscCall(PetscFree(s_waits)); 4776 PetscCall(PetscFree(r_waits)); 4777 4778 /* insert mat values of mpimat */ 4779 PetscCall(PetscMalloc1(N, &ba_i)); 4780 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4781 4782 for (k = 0; k < merge->nrecv; k++) { 4783 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4784 nrows = *(buf_ri_k[k]); 4785 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4786 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4787 } 4788 4789 /* set values of ba */ 4790 m = merge->rowmap->n; 4791 for (i = 0; i < m; i++) { 4792 arow = owners[rank] + i; 4793 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4794 bnzi = bi[i + 1] - bi[i]; 4795 PetscCall(PetscArrayzero(ba_i, bnzi)); 4796 4797 /* add local non-zero vals of this proc's seqmat into ba */ 4798 anzi = ai[arow + 1] - ai[arow]; 4799 aj = a->j + ai[arow]; 4800 aa = a_a + ai[arow]; 4801 nextaj = 0; 4802 for (j = 0; nextaj < anzi; j++) { 4803 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4804 ba_i[j] += aa[nextaj++]; 4805 } 4806 } 4807 4808 /* add received vals into ba */ 4809 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4810 /* i-th row */ 4811 if (i == *nextrow[k]) { 4812 anzi = *(nextai[k] + 1) - *nextai[k]; 4813 aj = buf_rj[k] + *(nextai[k]); 4814 aa = abuf_r[k] + *(nextai[k]); 4815 nextaj = 0; 4816 for (j = 0; nextaj < anzi; j++) { 4817 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4818 ba_i[j] += aa[nextaj++]; 4819 } 4820 } 4821 nextrow[k]++; 4822 nextai[k]++; 4823 } 4824 } 4825 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4826 } 4827 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4828 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4829 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4830 4831 PetscCall(PetscFree(abuf_r[0])); 4832 PetscCall(PetscFree(abuf_r)); 4833 PetscCall(PetscFree(ba_i)); 4834 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4835 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4836 PetscFunctionReturn(PETSC_SUCCESS); 4837 } 4838 4839 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4840 { 4841 Mat B_mpi; 4842 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4843 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4844 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4845 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4846 PetscInt len, proc, *dnz, *onz, bs, cbs; 4847 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4848 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4849 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4850 MPI_Status *status; 4851 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4852 PetscBT lnkbt; 4853 Mat_Merge_SeqsToMPI *merge; 4854 PetscContainer container; 4855 4856 PetscFunctionBegin; 4857 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4858 4859 /* make sure it is a PETSc comm */ 4860 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4861 PetscCallMPI(MPI_Comm_size(comm, &size)); 4862 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4863 4864 PetscCall(PetscNew(&merge)); 4865 PetscCall(PetscMalloc1(size, &status)); 4866 4867 /* determine row ownership */ 4868 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4869 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4870 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4871 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4872 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4873 PetscCall(PetscMalloc1(size, &len_si)); 4874 PetscCall(PetscMalloc1(size, &merge->len_s)); 4875 4876 m = merge->rowmap->n; 4877 owners = merge->rowmap->range; 4878 4879 /* determine the number of messages to send, their lengths */ 4880 len_s = merge->len_s; 4881 4882 len = 0; /* length of buf_si[] */ 4883 merge->nsend = 0; 4884 for (proc = 0; proc < size; proc++) { 4885 len_si[proc] = 0; 4886 if (proc == rank) { 4887 len_s[proc] = 0; 4888 } else { 4889 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4890 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4891 } 4892 if (len_s[proc]) { 4893 merge->nsend++; 4894 nrows = 0; 4895 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4896 if (ai[i + 1] > ai[i]) nrows++; 4897 } 4898 len_si[proc] = 2 * (nrows + 1); 4899 len += len_si[proc]; 4900 } 4901 } 4902 4903 /* determine the number and length of messages to receive for ij-structure */ 4904 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4905 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4906 4907 /* post the Irecv of j-structure */ 4908 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4909 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4910 4911 /* post the Isend of j-structure */ 4912 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4913 4914 for (proc = 0, k = 0; proc < size; proc++) { 4915 if (!len_s[proc]) continue; 4916 i = owners[proc]; 4917 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4918 k++; 4919 } 4920 4921 /* receives and sends of j-structure are complete */ 4922 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4923 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4924 4925 /* send and recv i-structure */ 4926 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4927 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4928 4929 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4930 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4931 for (proc = 0, k = 0; proc < size; proc++) { 4932 if (!len_s[proc]) continue; 4933 /* form outgoing message for i-structure: 4934 buf_si[0]: nrows to be sent 4935 [1:nrows]: row index (global) 4936 [nrows+1:2*nrows+1]: i-structure index 4937 */ 4938 nrows = len_si[proc] / 2 - 1; 4939 buf_si_i = buf_si + nrows + 1; 4940 buf_si[0] = nrows; 4941 buf_si_i[0] = 0; 4942 nrows = 0; 4943 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4944 anzi = ai[i + 1] - ai[i]; 4945 if (anzi) { 4946 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4947 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4948 nrows++; 4949 } 4950 } 4951 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4952 k++; 4953 buf_si += len_si[proc]; 4954 } 4955 4956 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4957 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4958 4959 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4960 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4961 4962 PetscCall(PetscFree(len_si)); 4963 PetscCall(PetscFree(len_ri)); 4964 PetscCall(PetscFree(rj_waits)); 4965 PetscCall(PetscFree2(si_waits, sj_waits)); 4966 PetscCall(PetscFree(ri_waits)); 4967 PetscCall(PetscFree(buf_s)); 4968 PetscCall(PetscFree(status)); 4969 4970 /* compute a local seq matrix in each processor */ 4971 /* allocate bi array and free space for accumulating nonzero column info */ 4972 PetscCall(PetscMalloc1(m + 1, &bi)); 4973 bi[0] = 0; 4974 4975 /* create and initialize a linked list */ 4976 nlnk = N + 1; 4977 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4978 4979 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4980 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4981 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4982 4983 current_space = free_space; 4984 4985 /* determine symbolic info for each local row */ 4986 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4987 4988 for (k = 0; k < merge->nrecv; k++) { 4989 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4990 nrows = *buf_ri_k[k]; 4991 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4992 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4993 } 4994 4995 MatPreallocateBegin(comm, m, n, dnz, onz); 4996 len = 0; 4997 for (i = 0; i < m; i++) { 4998 bnzi = 0; 4999 /* add local non-zero cols of this proc's seqmat into lnk */ 5000 arow = owners[rank] + i; 5001 anzi = ai[arow + 1] - ai[arow]; 5002 aj = a->j + ai[arow]; 5003 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5004 bnzi += nlnk; 5005 /* add received col data into lnk */ 5006 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5007 if (i == *nextrow[k]) { /* i-th row */ 5008 anzi = *(nextai[k] + 1) - *nextai[k]; 5009 aj = buf_rj[k] + *nextai[k]; 5010 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5011 bnzi += nlnk; 5012 nextrow[k]++; 5013 nextai[k]++; 5014 } 5015 } 5016 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5017 5018 /* if free space is not available, make more free space */ 5019 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5020 /* copy data into free space, then initialize lnk */ 5021 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5022 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5023 5024 current_space->array += bnzi; 5025 current_space->local_used += bnzi; 5026 current_space->local_remaining -= bnzi; 5027 5028 bi[i + 1] = bi[i] + bnzi; 5029 } 5030 5031 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5032 5033 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5034 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5035 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5036 5037 /* create symbolic parallel matrix B_mpi */ 5038 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5039 PetscCall(MatCreate(comm, &B_mpi)); 5040 if (n == PETSC_DECIDE) { 5041 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5042 } else { 5043 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5044 } 5045 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5046 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5047 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5048 MatPreallocateEnd(dnz, onz); 5049 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5050 5051 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5052 B_mpi->assembled = PETSC_FALSE; 5053 merge->bi = bi; 5054 merge->bj = bj; 5055 merge->buf_ri = buf_ri; 5056 merge->buf_rj = buf_rj; 5057 merge->coi = NULL; 5058 merge->coj = NULL; 5059 merge->owners_co = NULL; 5060 5061 PetscCall(PetscCommDestroy(&comm)); 5062 5063 /* attach the supporting struct to B_mpi for reuse */ 5064 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5065 PetscCall(PetscContainerSetPointer(container, merge)); 5066 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5067 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5068 PetscCall(PetscContainerDestroy(&container)); 5069 *mpimat = B_mpi; 5070 5071 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5072 PetscFunctionReturn(PETSC_SUCCESS); 5073 } 5074 5075 /*@C 5076 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5077 matrices from each processor 5078 5079 Collective 5080 5081 Input Parameters: 5082 + comm - the communicators the parallel matrix will live on 5083 . seqmat - the input sequential matrices 5084 . m - number of local rows (or `PETSC_DECIDE`) 5085 . n - number of local columns (or `PETSC_DECIDE`) 5086 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5087 5088 Output Parameter: 5089 . mpimat - the parallel matrix generated 5090 5091 Level: advanced 5092 5093 Note: 5094 The dimensions of the sequential matrix in each processor MUST be the same. 5095 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5096 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5097 5098 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5099 @*/ 5100 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5101 { 5102 PetscMPIInt size; 5103 5104 PetscFunctionBegin; 5105 PetscCallMPI(MPI_Comm_size(comm, &size)); 5106 if (size == 1) { 5107 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5108 if (scall == MAT_INITIAL_MATRIX) { 5109 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5110 } else { 5111 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5112 } 5113 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5114 PetscFunctionReturn(PETSC_SUCCESS); 5115 } 5116 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5117 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5118 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5119 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5120 PetscFunctionReturn(PETSC_SUCCESS); 5121 } 5122 5123 /*@ 5124 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5125 5126 Not Collective 5127 5128 Input Parameter: 5129 . A - the matrix 5130 5131 Output Parameter: 5132 . A_loc - the local sequential matrix generated 5133 5134 Level: developer 5135 5136 Notes: 5137 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5138 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5139 `n` is the global column count obtained with `MatGetSize()` 5140 5141 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5142 5143 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5144 5145 Destroy the matrix with `MatDestroy()` 5146 5147 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5148 @*/ 5149 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5150 { 5151 PetscBool mpi; 5152 5153 PetscFunctionBegin; 5154 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5155 if (mpi) { 5156 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5157 } else { 5158 *A_loc = A; 5159 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5160 } 5161 PetscFunctionReturn(PETSC_SUCCESS); 5162 } 5163 5164 /*@ 5165 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5166 5167 Not Collective 5168 5169 Input Parameters: 5170 + A - the matrix 5171 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5172 5173 Output Parameter: 5174 . A_loc - the local sequential matrix generated 5175 5176 Level: developer 5177 5178 Notes: 5179 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5180 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5181 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5182 5183 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5184 5185 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5186 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5187 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5188 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5189 5190 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5191 @*/ 5192 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5193 { 5194 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5195 Mat_SeqAIJ *mat, *a, *b; 5196 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5197 const PetscScalar *aa, *ba, *aav, *bav; 5198 PetscScalar *ca, *cam; 5199 PetscMPIInt size; 5200 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5201 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5202 PetscBool match; 5203 5204 PetscFunctionBegin; 5205 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5206 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5207 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5208 if (size == 1) { 5209 if (scall == MAT_INITIAL_MATRIX) { 5210 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5211 *A_loc = mpimat->A; 5212 } else if (scall == MAT_REUSE_MATRIX) { 5213 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5214 } 5215 PetscFunctionReturn(PETSC_SUCCESS); 5216 } 5217 5218 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5219 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5220 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5221 ai = a->i; 5222 aj = a->j; 5223 bi = b->i; 5224 bj = b->j; 5225 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5226 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5227 aa = aav; 5228 ba = bav; 5229 if (scall == MAT_INITIAL_MATRIX) { 5230 PetscCall(PetscMalloc1(1 + am, &ci)); 5231 ci[0] = 0; 5232 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5233 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5234 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5235 k = 0; 5236 for (i = 0; i < am; i++) { 5237 ncols_o = bi[i + 1] - bi[i]; 5238 ncols_d = ai[i + 1] - ai[i]; 5239 /* off-diagonal portion of A */ 5240 for (jo = 0; jo < ncols_o; jo++) { 5241 col = cmap[*bj]; 5242 if (col >= cstart) break; 5243 cj[k] = col; 5244 bj++; 5245 ca[k++] = *ba++; 5246 } 5247 /* diagonal portion of A */ 5248 for (j = 0; j < ncols_d; j++) { 5249 cj[k] = cstart + *aj++; 5250 ca[k++] = *aa++; 5251 } 5252 /* off-diagonal portion of A */ 5253 for (j = jo; j < ncols_o; j++) { 5254 cj[k] = cmap[*bj++]; 5255 ca[k++] = *ba++; 5256 } 5257 } 5258 /* put together the new matrix */ 5259 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5260 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5261 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5262 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5263 mat->free_a = PETSC_TRUE; 5264 mat->free_ij = PETSC_TRUE; 5265 mat->nonew = 0; 5266 } else if (scall == MAT_REUSE_MATRIX) { 5267 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5268 ci = mat->i; 5269 cj = mat->j; 5270 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5271 for (i = 0; i < am; i++) { 5272 /* off-diagonal portion of A */ 5273 ncols_o = bi[i + 1] - bi[i]; 5274 for (jo = 0; jo < ncols_o; jo++) { 5275 col = cmap[*bj]; 5276 if (col >= cstart) break; 5277 *cam++ = *ba++; 5278 bj++; 5279 } 5280 /* diagonal portion of A */ 5281 ncols_d = ai[i + 1] - ai[i]; 5282 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5283 /* off-diagonal portion of A */ 5284 for (j = jo; j < ncols_o; j++) { 5285 *cam++ = *ba++; 5286 bj++; 5287 } 5288 } 5289 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5290 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5291 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5292 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5293 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5294 PetscFunctionReturn(PETSC_SUCCESS); 5295 } 5296 5297 /*@ 5298 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5299 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5300 5301 Not Collective 5302 5303 Input Parameters: 5304 + A - the matrix 5305 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5306 5307 Output Parameters: 5308 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5309 - A_loc - the local sequential matrix generated 5310 5311 Level: developer 5312 5313 Note: 5314 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5315 part, then those associated with the off diagonal part (in its local ordering) 5316 5317 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5318 @*/ 5319 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5320 { 5321 Mat Ao, Ad; 5322 const PetscInt *cmap; 5323 PetscMPIInt size; 5324 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5325 5326 PetscFunctionBegin; 5327 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5328 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5329 if (size == 1) { 5330 if (scall == MAT_INITIAL_MATRIX) { 5331 PetscCall(PetscObjectReference((PetscObject)Ad)); 5332 *A_loc = Ad; 5333 } else if (scall == MAT_REUSE_MATRIX) { 5334 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5335 } 5336 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5337 PetscFunctionReturn(PETSC_SUCCESS); 5338 } 5339 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5340 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5341 if (f) { 5342 PetscCall((*f)(A, scall, glob, A_loc)); 5343 } else { 5344 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5345 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5346 Mat_SeqAIJ *c; 5347 PetscInt *ai = a->i, *aj = a->j; 5348 PetscInt *bi = b->i, *bj = b->j; 5349 PetscInt *ci, *cj; 5350 const PetscScalar *aa, *ba; 5351 PetscScalar *ca; 5352 PetscInt i, j, am, dn, on; 5353 5354 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5355 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5356 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5357 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5358 if (scall == MAT_INITIAL_MATRIX) { 5359 PetscInt k; 5360 PetscCall(PetscMalloc1(1 + am, &ci)); 5361 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5362 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5363 ci[0] = 0; 5364 for (i = 0, k = 0; i < am; i++) { 5365 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5366 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5367 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5368 /* diagonal portion of A */ 5369 for (j = 0; j < ncols_d; j++, k++) { 5370 cj[k] = *aj++; 5371 ca[k] = *aa++; 5372 } 5373 /* off-diagonal portion of A */ 5374 for (j = 0; j < ncols_o; j++, k++) { 5375 cj[k] = dn + *bj++; 5376 ca[k] = *ba++; 5377 } 5378 } 5379 /* put together the new matrix */ 5380 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5381 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5382 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5383 c = (Mat_SeqAIJ *)(*A_loc)->data; 5384 c->free_a = PETSC_TRUE; 5385 c->free_ij = PETSC_TRUE; 5386 c->nonew = 0; 5387 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5388 } else if (scall == MAT_REUSE_MATRIX) { 5389 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5390 for (i = 0; i < am; i++) { 5391 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5392 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5393 /* diagonal portion of A */ 5394 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5395 /* off-diagonal portion of A */ 5396 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5397 } 5398 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5399 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5400 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5401 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5402 if (glob) { 5403 PetscInt cst, *gidx; 5404 5405 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5406 PetscCall(PetscMalloc1(dn + on, &gidx)); 5407 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5408 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5409 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5410 } 5411 } 5412 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5413 PetscFunctionReturn(PETSC_SUCCESS); 5414 } 5415 5416 /*@C 5417 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5418 5419 Not Collective 5420 5421 Input Parameters: 5422 + A - the matrix 5423 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5424 . row - index set of rows to extract (or `NULL`) 5425 - col - index set of columns to extract (or `NULL`) 5426 5427 Output Parameter: 5428 . A_loc - the local sequential matrix generated 5429 5430 Level: developer 5431 5432 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5433 @*/ 5434 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5435 { 5436 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5437 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5438 IS isrowa, iscola; 5439 Mat *aloc; 5440 PetscBool match; 5441 5442 PetscFunctionBegin; 5443 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5444 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5445 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5446 if (!row) { 5447 start = A->rmap->rstart; 5448 end = A->rmap->rend; 5449 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5450 } else { 5451 isrowa = *row; 5452 } 5453 if (!col) { 5454 start = A->cmap->rstart; 5455 cmap = a->garray; 5456 nzA = a->A->cmap->n; 5457 nzB = a->B->cmap->n; 5458 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5459 ncols = 0; 5460 for (i = 0; i < nzB; i++) { 5461 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5462 else break; 5463 } 5464 imark = i; 5465 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5466 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5467 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5468 } else { 5469 iscola = *col; 5470 } 5471 if (scall != MAT_INITIAL_MATRIX) { 5472 PetscCall(PetscMalloc1(1, &aloc)); 5473 aloc[0] = *A_loc; 5474 } 5475 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5476 if (!col) { /* attach global id of condensed columns */ 5477 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5478 } 5479 *A_loc = aloc[0]; 5480 PetscCall(PetscFree(aloc)); 5481 if (!row) PetscCall(ISDestroy(&isrowa)); 5482 if (!col) PetscCall(ISDestroy(&iscola)); 5483 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5484 PetscFunctionReturn(PETSC_SUCCESS); 5485 } 5486 5487 /* 5488 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5489 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5490 * on a global size. 5491 * */ 5492 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5493 { 5494 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5495 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5496 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5497 PetscMPIInt owner; 5498 PetscSFNode *iremote, *oiremote; 5499 const PetscInt *lrowindices; 5500 PetscSF sf, osf; 5501 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5502 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5503 MPI_Comm comm; 5504 ISLocalToGlobalMapping mapping; 5505 const PetscScalar *pd_a, *po_a; 5506 5507 PetscFunctionBegin; 5508 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5509 /* plocalsize is the number of roots 5510 * nrows is the number of leaves 5511 * */ 5512 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5513 PetscCall(ISGetLocalSize(rows, &nrows)); 5514 PetscCall(PetscCalloc1(nrows, &iremote)); 5515 PetscCall(ISGetIndices(rows, &lrowindices)); 5516 for (i = 0; i < nrows; i++) { 5517 /* Find a remote index and an owner for a row 5518 * The row could be local or remote 5519 * */ 5520 owner = 0; 5521 lidx = 0; 5522 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5523 iremote[i].index = lidx; 5524 iremote[i].rank = owner; 5525 } 5526 /* Create SF to communicate how many nonzero columns for each row */ 5527 PetscCall(PetscSFCreate(comm, &sf)); 5528 /* SF will figure out the number of nonzero colunms for each row, and their 5529 * offsets 5530 * */ 5531 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5532 PetscCall(PetscSFSetFromOptions(sf)); 5533 PetscCall(PetscSFSetUp(sf)); 5534 5535 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5536 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5537 PetscCall(PetscCalloc1(nrows, &pnnz)); 5538 roffsets[0] = 0; 5539 roffsets[1] = 0; 5540 for (i = 0; i < plocalsize; i++) { 5541 /* diag */ 5542 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5543 /* off diag */ 5544 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5545 /* compute offsets so that we relative location for each row */ 5546 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5547 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5548 } 5549 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5550 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5551 /* 'r' means root, and 'l' means leaf */ 5552 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5553 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5554 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5555 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5556 PetscCall(PetscSFDestroy(&sf)); 5557 PetscCall(PetscFree(roffsets)); 5558 PetscCall(PetscFree(nrcols)); 5559 dntotalcols = 0; 5560 ontotalcols = 0; 5561 ncol = 0; 5562 for (i = 0; i < nrows; i++) { 5563 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5564 ncol = PetscMax(pnnz[i], ncol); 5565 /* diag */ 5566 dntotalcols += nlcols[i * 2 + 0]; 5567 /* off diag */ 5568 ontotalcols += nlcols[i * 2 + 1]; 5569 } 5570 /* We do not need to figure the right number of columns 5571 * since all the calculations will be done by going through the raw data 5572 * */ 5573 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5574 PetscCall(MatSetUp(*P_oth)); 5575 PetscCall(PetscFree(pnnz)); 5576 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5577 /* diag */ 5578 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5579 /* off diag */ 5580 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5581 /* diag */ 5582 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5583 /* off diag */ 5584 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5585 dntotalcols = 0; 5586 ontotalcols = 0; 5587 ntotalcols = 0; 5588 for (i = 0; i < nrows; i++) { 5589 owner = 0; 5590 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5591 /* Set iremote for diag matrix */ 5592 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5593 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5594 iremote[dntotalcols].rank = owner; 5595 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5596 ilocal[dntotalcols++] = ntotalcols++; 5597 } 5598 /* off diag */ 5599 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5600 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5601 oiremote[ontotalcols].rank = owner; 5602 oilocal[ontotalcols++] = ntotalcols++; 5603 } 5604 } 5605 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5606 PetscCall(PetscFree(loffsets)); 5607 PetscCall(PetscFree(nlcols)); 5608 PetscCall(PetscSFCreate(comm, &sf)); 5609 /* P serves as roots and P_oth is leaves 5610 * Diag matrix 5611 * */ 5612 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5613 PetscCall(PetscSFSetFromOptions(sf)); 5614 PetscCall(PetscSFSetUp(sf)); 5615 5616 PetscCall(PetscSFCreate(comm, &osf)); 5617 /* Off diag */ 5618 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5619 PetscCall(PetscSFSetFromOptions(osf)); 5620 PetscCall(PetscSFSetUp(osf)); 5621 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5622 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5623 /* We operate on the matrix internal data for saving memory */ 5624 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5625 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5626 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5627 /* Convert to global indices for diag matrix */ 5628 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5629 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5630 /* We want P_oth store global indices */ 5631 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5632 /* Use memory scalable approach */ 5633 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5634 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5635 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5636 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5637 /* Convert back to local indices */ 5638 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5639 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5640 nout = 0; 5641 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5642 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5643 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5644 /* Exchange values */ 5645 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5646 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5647 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5648 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5649 /* Stop PETSc from shrinking memory */ 5650 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5651 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5652 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5653 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5654 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5655 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5656 PetscCall(PetscSFDestroy(&sf)); 5657 PetscCall(PetscSFDestroy(&osf)); 5658 PetscFunctionReturn(PETSC_SUCCESS); 5659 } 5660 5661 /* 5662 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5663 * This supports MPIAIJ and MAIJ 5664 * */ 5665 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5666 { 5667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5668 Mat_SeqAIJ *p_oth; 5669 IS rows, map; 5670 PetscHMapI hamp; 5671 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5672 MPI_Comm comm; 5673 PetscSF sf, osf; 5674 PetscBool has; 5675 5676 PetscFunctionBegin; 5677 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5678 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5679 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5680 * and then create a submatrix (that often is an overlapping matrix) 5681 * */ 5682 if (reuse == MAT_INITIAL_MATRIX) { 5683 /* Use a hash table to figure out unique keys */ 5684 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5685 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5686 count = 0; 5687 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5688 for (i = 0; i < a->B->cmap->n; i++) { 5689 key = a->garray[i] / dof; 5690 PetscCall(PetscHMapIHas(hamp, key, &has)); 5691 if (!has) { 5692 mapping[i] = count; 5693 PetscCall(PetscHMapISet(hamp, key, count++)); 5694 } else { 5695 /* Current 'i' has the same value the previous step */ 5696 mapping[i] = count - 1; 5697 } 5698 } 5699 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5700 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5701 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5702 PetscCall(PetscCalloc1(htsize, &rowindices)); 5703 off = 0; 5704 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5705 PetscCall(PetscHMapIDestroy(&hamp)); 5706 PetscCall(PetscSortInt(htsize, rowindices)); 5707 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5708 /* In case, the matrix was already created but users want to recreate the matrix */ 5709 PetscCall(MatDestroy(P_oth)); 5710 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5711 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5712 PetscCall(ISDestroy(&map)); 5713 PetscCall(ISDestroy(&rows)); 5714 } else if (reuse == MAT_REUSE_MATRIX) { 5715 /* If matrix was already created, we simply update values using SF objects 5716 * that as attached to the matrix earlier. 5717 */ 5718 const PetscScalar *pd_a, *po_a; 5719 5720 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5721 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5722 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5723 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5724 /* Update values in place */ 5725 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5726 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5727 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5728 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5729 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5730 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5731 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5732 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5733 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5734 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5735 PetscFunctionReturn(PETSC_SUCCESS); 5736 } 5737 5738 /*@C 5739 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5740 5741 Collective 5742 5743 Input Parameters: 5744 + A - the first matrix in `MATMPIAIJ` format 5745 . B - the second matrix in `MATMPIAIJ` format 5746 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5747 5748 Output Parameters: 5749 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5750 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5751 - B_seq - the sequential matrix generated 5752 5753 Level: developer 5754 5755 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5756 @*/ 5757 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5758 { 5759 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5760 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5761 IS isrowb, iscolb; 5762 Mat *bseq = NULL; 5763 5764 PetscFunctionBegin; 5765 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5766 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5767 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5768 5769 if (scall == MAT_INITIAL_MATRIX) { 5770 start = A->cmap->rstart; 5771 cmap = a->garray; 5772 nzA = a->A->cmap->n; 5773 nzB = a->B->cmap->n; 5774 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5775 ncols = 0; 5776 for (i = 0; i < nzB; i++) { /* row < local row index */ 5777 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5778 else break; 5779 } 5780 imark = i; 5781 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5782 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5783 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5784 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5785 } else { 5786 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5787 isrowb = *rowb; 5788 iscolb = *colb; 5789 PetscCall(PetscMalloc1(1, &bseq)); 5790 bseq[0] = *B_seq; 5791 } 5792 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5793 *B_seq = bseq[0]; 5794 PetscCall(PetscFree(bseq)); 5795 if (!rowb) { 5796 PetscCall(ISDestroy(&isrowb)); 5797 } else { 5798 *rowb = isrowb; 5799 } 5800 if (!colb) { 5801 PetscCall(ISDestroy(&iscolb)); 5802 } else { 5803 *colb = iscolb; 5804 } 5805 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5806 PetscFunctionReturn(PETSC_SUCCESS); 5807 } 5808 5809 /* 5810 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5811 of the OFF-DIAGONAL portion of local A 5812 5813 Collective 5814 5815 Input Parameters: 5816 + A,B - the matrices in `MATMPIAIJ` format 5817 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5818 5819 Output Parameter: 5820 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5821 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5822 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5823 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5824 5825 Developer Note: 5826 This directly accesses information inside the VecScatter associated with the matrix-vector product 5827 for this matrix. This is not desirable.. 5828 5829 Level: developer 5830 5831 */ 5832 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5833 { 5834 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5835 Mat_SeqAIJ *b_oth; 5836 VecScatter ctx; 5837 MPI_Comm comm; 5838 const PetscMPIInt *rprocs, *sprocs; 5839 const PetscInt *srow, *rstarts, *sstarts; 5840 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5841 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5842 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5843 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5844 PetscMPIInt size, tag, rank, nreqs; 5845 5846 PetscFunctionBegin; 5847 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5848 PetscCallMPI(MPI_Comm_size(comm, &size)); 5849 5850 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5851 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5852 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5853 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5854 5855 if (size == 1) { 5856 startsj_s = NULL; 5857 bufa_ptr = NULL; 5858 *B_oth = NULL; 5859 PetscFunctionReturn(PETSC_SUCCESS); 5860 } 5861 5862 ctx = a->Mvctx; 5863 tag = ((PetscObject)ctx)->tag; 5864 5865 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5866 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5867 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5868 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5869 PetscCall(PetscMalloc1(nreqs, &reqs)); 5870 rwaits = reqs; 5871 swaits = reqs + nrecvs; 5872 5873 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5874 if (scall == MAT_INITIAL_MATRIX) { 5875 /* i-array */ 5876 /* post receives */ 5877 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5878 for (i = 0; i < nrecvs; i++) { 5879 rowlen = rvalues + rstarts[i] * rbs; 5880 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5881 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5882 } 5883 5884 /* pack the outgoing message */ 5885 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5886 5887 sstartsj[0] = 0; 5888 rstartsj[0] = 0; 5889 len = 0; /* total length of j or a array to be sent */ 5890 if (nsends) { 5891 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5892 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5893 } 5894 for (i = 0; i < nsends; i++) { 5895 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5896 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5897 for (j = 0; j < nrows; j++) { 5898 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5899 for (l = 0; l < sbs; l++) { 5900 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5901 5902 rowlen[j * sbs + l] = ncols; 5903 5904 len += ncols; 5905 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5906 } 5907 k++; 5908 } 5909 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5910 5911 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5912 } 5913 /* recvs and sends of i-array are completed */ 5914 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5915 PetscCall(PetscFree(svalues)); 5916 5917 /* allocate buffers for sending j and a arrays */ 5918 PetscCall(PetscMalloc1(len + 1, &bufj)); 5919 PetscCall(PetscMalloc1(len + 1, &bufa)); 5920 5921 /* create i-array of B_oth */ 5922 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5923 5924 b_othi[0] = 0; 5925 len = 0; /* total length of j or a array to be received */ 5926 k = 0; 5927 for (i = 0; i < nrecvs; i++) { 5928 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5929 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5930 for (j = 0; j < nrows; j++) { 5931 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5932 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5933 k++; 5934 } 5935 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5936 } 5937 PetscCall(PetscFree(rvalues)); 5938 5939 /* allocate space for j and a arrays of B_oth */ 5940 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5941 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5942 5943 /* j-array */ 5944 /* post receives of j-array */ 5945 for (i = 0; i < nrecvs; i++) { 5946 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5947 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5948 } 5949 5950 /* pack the outgoing message j-array */ 5951 if (nsends) k = sstarts[0]; 5952 for (i = 0; i < nsends; i++) { 5953 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5954 bufJ = bufj + sstartsj[i]; 5955 for (j = 0; j < nrows; j++) { 5956 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5957 for (ll = 0; ll < sbs; ll++) { 5958 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5959 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5960 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5961 } 5962 } 5963 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5964 } 5965 5966 /* recvs and sends of j-array are completed */ 5967 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5968 } else if (scall == MAT_REUSE_MATRIX) { 5969 sstartsj = *startsj_s; 5970 rstartsj = *startsj_r; 5971 bufa = *bufa_ptr; 5972 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5973 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5974 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5975 5976 /* a-array */ 5977 /* post receives of a-array */ 5978 for (i = 0; i < nrecvs; i++) { 5979 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5980 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5981 } 5982 5983 /* pack the outgoing message a-array */ 5984 if (nsends) k = sstarts[0]; 5985 for (i = 0; i < nsends; i++) { 5986 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5987 bufA = bufa + sstartsj[i]; 5988 for (j = 0; j < nrows; j++) { 5989 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5990 for (ll = 0; ll < sbs; ll++) { 5991 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5992 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5993 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5994 } 5995 } 5996 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5997 } 5998 /* recvs and sends of a-array are completed */ 5999 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6000 PetscCall(PetscFree(reqs)); 6001 6002 if (scall == MAT_INITIAL_MATRIX) { 6003 /* put together the new matrix */ 6004 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6005 6006 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6007 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6008 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6009 b_oth->free_a = PETSC_TRUE; 6010 b_oth->free_ij = PETSC_TRUE; 6011 b_oth->nonew = 0; 6012 6013 PetscCall(PetscFree(bufj)); 6014 if (!startsj_s || !bufa_ptr) { 6015 PetscCall(PetscFree2(sstartsj, rstartsj)); 6016 PetscCall(PetscFree(bufa_ptr)); 6017 } else { 6018 *startsj_s = sstartsj; 6019 *startsj_r = rstartsj; 6020 *bufa_ptr = bufa; 6021 } 6022 } else if (scall == MAT_REUSE_MATRIX) { 6023 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6024 } 6025 6026 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6027 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6028 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6029 PetscFunctionReturn(PETSC_SUCCESS); 6030 } 6031 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6035 #if defined(PETSC_HAVE_MKL_SPARSE) 6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6037 #endif 6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6040 #if defined(PETSC_HAVE_ELEMENTAL) 6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6042 #endif 6043 #if defined(PETSC_HAVE_SCALAPACK) 6044 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6045 #endif 6046 #if defined(PETSC_HAVE_HYPRE) 6047 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6048 #endif 6049 #if defined(PETSC_HAVE_CUDA) 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6051 #endif 6052 #if defined(PETSC_HAVE_HIP) 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6054 #endif 6055 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6057 #endif 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6059 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6060 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6061 6062 /* 6063 Computes (B'*A')' since computing B*A directly is untenable 6064 6065 n p p 6066 [ ] [ ] [ ] 6067 m [ A ] * n [ B ] = m [ C ] 6068 [ ] [ ] [ ] 6069 6070 */ 6071 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6072 { 6073 Mat At, Bt, Ct; 6074 6075 PetscFunctionBegin; 6076 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6077 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6078 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6079 PetscCall(MatDestroy(&At)); 6080 PetscCall(MatDestroy(&Bt)); 6081 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6082 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6083 PetscCall(MatDestroy(&Ct)); 6084 PetscFunctionReturn(PETSC_SUCCESS); 6085 } 6086 6087 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6088 { 6089 PetscBool cisdense; 6090 6091 PetscFunctionBegin; 6092 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6093 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6094 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6095 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6096 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6097 PetscCall(MatSetUp(C)); 6098 6099 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6100 PetscFunctionReturn(PETSC_SUCCESS); 6101 } 6102 6103 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6104 { 6105 Mat_Product *product = C->product; 6106 Mat A = product->A, B = product->B; 6107 6108 PetscFunctionBegin; 6109 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6110 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6111 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6112 C->ops->productsymbolic = MatProductSymbolic_AB; 6113 PetscFunctionReturn(PETSC_SUCCESS); 6114 } 6115 6116 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6117 { 6118 Mat_Product *product = C->product; 6119 6120 PetscFunctionBegin; 6121 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6122 PetscFunctionReturn(PETSC_SUCCESS); 6123 } 6124 6125 /* 6126 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6127 6128 Input Parameters: 6129 6130 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6131 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6132 6133 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6134 6135 For Set1, j1[] contains column indices of the nonzeros. 6136 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6137 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6138 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6139 6140 Similar for Set2. 6141 6142 This routine merges the two sets of nonzeros row by row and removes repeats. 6143 6144 Output Parameters: (memory is allocated by the caller) 6145 6146 i[],j[]: the CSR of the merged matrix, which has m rows. 6147 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6148 imap2[]: similar to imap1[], but for Set2. 6149 Note we order nonzeros row-by-row and from left to right. 6150 */ 6151 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6152 { 6153 PetscInt r, m; /* Row index of mat */ 6154 PetscCount t, t1, t2, b1, e1, b2, e2; 6155 6156 PetscFunctionBegin; 6157 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6158 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6159 i[0] = 0; 6160 for (r = 0; r < m; r++) { /* Do row by row merging */ 6161 b1 = rowBegin1[r]; 6162 e1 = rowEnd1[r]; 6163 b2 = rowBegin2[r]; 6164 e2 = rowEnd2[r]; 6165 while (b1 < e1 && b2 < e2) { 6166 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6167 j[t] = j1[b1]; 6168 imap1[t1] = t; 6169 imap2[t2] = t; 6170 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6171 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6172 t1++; 6173 t2++; 6174 t++; 6175 } else if (j1[b1] < j2[b2]) { 6176 j[t] = j1[b1]; 6177 imap1[t1] = t; 6178 b1 += jmap1[t1 + 1] - jmap1[t1]; 6179 t1++; 6180 t++; 6181 } else { 6182 j[t] = j2[b2]; 6183 imap2[t2] = t; 6184 b2 += jmap2[t2 + 1] - jmap2[t2]; 6185 t2++; 6186 t++; 6187 } 6188 } 6189 /* Merge the remaining in either j1[] or j2[] */ 6190 while (b1 < e1) { 6191 j[t] = j1[b1]; 6192 imap1[t1] = t; 6193 b1 += jmap1[t1 + 1] - jmap1[t1]; 6194 t1++; 6195 t++; 6196 } 6197 while (b2 < e2) { 6198 j[t] = j2[b2]; 6199 imap2[t2] = t; 6200 b2 += jmap2[t2 + 1] - jmap2[t2]; 6201 t2++; 6202 t++; 6203 } 6204 i[r + 1] = t; 6205 } 6206 PetscFunctionReturn(PETSC_SUCCESS); 6207 } 6208 6209 /* 6210 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6211 6212 Input Parameters: 6213 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6214 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6215 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6216 6217 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6218 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6219 6220 Output Parameters: 6221 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6222 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6223 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6224 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6225 6226 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6227 Atot: number of entries belonging to the diagonal block. 6228 Annz: number of unique nonzeros belonging to the diagonal block. 6229 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6230 repeats (i.e., same 'i,j' pair). 6231 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6232 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6233 6234 Atot: number of entries belonging to the diagonal block 6235 Annz: number of unique nonzeros belonging to the diagonal block. 6236 6237 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6238 6239 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6240 */ 6241 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6242 { 6243 PetscInt cstart, cend, rstart, rend, row, col; 6244 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6245 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6246 PetscCount k, m, p, q, r, s, mid; 6247 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6248 6249 PetscFunctionBegin; 6250 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6251 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6252 m = rend - rstart; 6253 6254 /* Skip negative rows */ 6255 for (k = 0; k < n; k++) 6256 if (i[k] >= 0) break; 6257 6258 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6259 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6260 */ 6261 while (k < n) { 6262 row = i[k]; 6263 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6264 for (s = k; s < n; s++) 6265 if (i[s] != row) break; 6266 6267 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6268 for (p = k; p < s; p++) { 6269 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6270 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6271 } 6272 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6273 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6274 rowBegin[row - rstart] = k; 6275 rowMid[row - rstart] = mid; 6276 rowEnd[row - rstart] = s; 6277 6278 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6279 Atot += mid - k; 6280 Btot += s - mid; 6281 6282 /* Count unique nonzeros of this diag row */ 6283 for (p = k; p < mid;) { 6284 col = j[p]; 6285 do { 6286 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6287 p++; 6288 } while (p < mid && j[p] == col); 6289 Annz++; 6290 } 6291 6292 /* Count unique nonzeros of this offdiag row */ 6293 for (p = mid; p < s;) { 6294 col = j[p]; 6295 do { 6296 p++; 6297 } while (p < s && j[p] == col); 6298 Bnnz++; 6299 } 6300 k = s; 6301 } 6302 6303 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6304 PetscCall(PetscMalloc1(Atot, &Aperm)); 6305 PetscCall(PetscMalloc1(Btot, &Bperm)); 6306 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6307 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6308 6309 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6310 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6311 for (r = 0; r < m; r++) { 6312 k = rowBegin[r]; 6313 mid = rowMid[r]; 6314 s = rowEnd[r]; 6315 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6316 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6317 Atot += mid - k; 6318 Btot += s - mid; 6319 6320 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6321 for (p = k; p < mid;) { 6322 col = j[p]; 6323 q = p; 6324 do { 6325 p++; 6326 } while (p < mid && j[p] == col); 6327 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6328 Annz++; 6329 } 6330 6331 for (p = mid; p < s;) { 6332 col = j[p]; 6333 q = p; 6334 do { 6335 p++; 6336 } while (p < s && j[p] == col); 6337 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6338 Bnnz++; 6339 } 6340 } 6341 /* Output */ 6342 *Aperm_ = Aperm; 6343 *Annz_ = Annz; 6344 *Atot_ = Atot; 6345 *Ajmap_ = Ajmap; 6346 *Bperm_ = Bperm; 6347 *Bnnz_ = Bnnz; 6348 *Btot_ = Btot; 6349 *Bjmap_ = Bjmap; 6350 PetscFunctionReturn(PETSC_SUCCESS); 6351 } 6352 6353 /* 6354 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6355 6356 Input Parameters: 6357 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6358 nnz: number of unique nonzeros in the merged matrix 6359 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6360 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6361 6362 Output Parameter: (memory is allocated by the caller) 6363 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6364 6365 Example: 6366 nnz1 = 4 6367 nnz = 6 6368 imap = [1,3,4,5] 6369 jmap = [0,3,5,6,7] 6370 then, 6371 jmap_new = [0,0,3,3,5,6,7] 6372 */ 6373 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6374 { 6375 PetscCount k, p; 6376 6377 PetscFunctionBegin; 6378 jmap_new[0] = 0; 6379 p = nnz; /* p loops over jmap_new[] backwards */ 6380 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6381 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6382 } 6383 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6384 PetscFunctionReturn(PETSC_SUCCESS); 6385 } 6386 6387 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6388 { 6389 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6390 6391 PetscFunctionBegin; 6392 PetscCall(PetscSFDestroy(&coo->sf)); 6393 PetscCall(PetscFree(coo->Aperm1)); 6394 PetscCall(PetscFree(coo->Bperm1)); 6395 PetscCall(PetscFree(coo->Ajmap1)); 6396 PetscCall(PetscFree(coo->Bjmap1)); 6397 PetscCall(PetscFree(coo->Aimap2)); 6398 PetscCall(PetscFree(coo->Bimap2)); 6399 PetscCall(PetscFree(coo->Aperm2)); 6400 PetscCall(PetscFree(coo->Bperm2)); 6401 PetscCall(PetscFree(coo->Ajmap2)); 6402 PetscCall(PetscFree(coo->Bjmap2)); 6403 PetscCall(PetscFree(coo->Cperm1)); 6404 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6405 PetscCall(PetscFree(coo)); 6406 PetscFunctionReturn(PETSC_SUCCESS); 6407 } 6408 6409 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6410 { 6411 MPI_Comm comm; 6412 PetscMPIInt rank, size; 6413 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6414 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6415 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6416 PetscContainer container; 6417 MatCOOStruct_MPIAIJ *coo; 6418 6419 PetscFunctionBegin; 6420 PetscCall(PetscFree(mpiaij->garray)); 6421 PetscCall(VecDestroy(&mpiaij->lvec)); 6422 #if defined(PETSC_USE_CTABLE) 6423 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6424 #else 6425 PetscCall(PetscFree(mpiaij->colmap)); 6426 #endif 6427 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6428 mat->assembled = PETSC_FALSE; 6429 mat->was_assembled = PETSC_FALSE; 6430 6431 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6432 PetscCallMPI(MPI_Comm_size(comm, &size)); 6433 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6434 PetscCall(PetscLayoutSetUp(mat->rmap)); 6435 PetscCall(PetscLayoutSetUp(mat->cmap)); 6436 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6437 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6438 PetscCall(MatGetLocalSize(mat, &m, &n)); 6439 PetscCall(MatGetSize(mat, &M, &N)); 6440 6441 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6442 /* entries come first, then local rows, then remote rows. */ 6443 PetscCount n1 = coo_n, *perm1; 6444 PetscInt *i1 = coo_i, *j1 = coo_j; 6445 6446 PetscCall(PetscMalloc1(n1, &perm1)); 6447 for (k = 0; k < n1; k++) perm1[k] = k; 6448 6449 /* Manipulate indices so that entries with negative row or col indices will have smallest 6450 row indices, local entries will have greater but negative row indices, and remote entries 6451 will have positive row indices. 6452 */ 6453 for (k = 0; k < n1; k++) { 6454 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6455 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6456 else { 6457 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6458 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6459 } 6460 } 6461 6462 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6463 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6464 6465 /* Advance k to the first entry we need to take care of */ 6466 for (k = 0; k < n1; k++) 6467 if (i1[k] > PETSC_MIN_INT) break; 6468 PetscInt i1start = k; 6469 6470 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6471 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6472 6473 /* Send remote rows to their owner */ 6474 /* Find which rows should be sent to which remote ranks*/ 6475 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6476 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6477 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6478 const PetscInt *ranges; 6479 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6480 6481 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6482 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6483 for (k = rem; k < n1;) { 6484 PetscMPIInt owner; 6485 PetscInt firstRow, lastRow; 6486 6487 /* Locate a row range */ 6488 firstRow = i1[k]; /* first row of this owner */ 6489 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6490 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6491 6492 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6493 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6494 6495 /* All entries in [k,p) belong to this remote owner */ 6496 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6497 PetscMPIInt *sendto2; 6498 PetscInt *nentries2; 6499 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6500 6501 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6502 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6503 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6504 PetscCall(PetscFree2(sendto, nentries2)); 6505 sendto = sendto2; 6506 nentries = nentries2; 6507 maxNsend = maxNsend2; 6508 } 6509 sendto[nsend] = owner; 6510 nentries[nsend] = p - k; 6511 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6512 nsend++; 6513 k = p; 6514 } 6515 6516 /* Build 1st SF to know offsets on remote to send data */ 6517 PetscSF sf1; 6518 PetscInt nroots = 1, nroots2 = 0; 6519 PetscInt nleaves = nsend, nleaves2 = 0; 6520 PetscInt *offsets; 6521 PetscSFNode *iremote; 6522 6523 PetscCall(PetscSFCreate(comm, &sf1)); 6524 PetscCall(PetscMalloc1(nsend, &iremote)); 6525 PetscCall(PetscMalloc1(nsend, &offsets)); 6526 for (k = 0; k < nsend; k++) { 6527 iremote[k].rank = sendto[k]; 6528 iremote[k].index = 0; 6529 nleaves2 += nentries[k]; 6530 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6531 } 6532 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6533 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6534 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6535 PetscCall(PetscSFDestroy(&sf1)); 6536 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6537 6538 /* Build 2nd SF to send remote COOs to their owner */ 6539 PetscSF sf2; 6540 nroots = nroots2; 6541 nleaves = nleaves2; 6542 PetscCall(PetscSFCreate(comm, &sf2)); 6543 PetscCall(PetscSFSetFromOptions(sf2)); 6544 PetscCall(PetscMalloc1(nleaves, &iremote)); 6545 p = 0; 6546 for (k = 0; k < nsend; k++) { 6547 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6548 for (q = 0; q < nentries[k]; q++, p++) { 6549 iremote[p].rank = sendto[k]; 6550 iremote[p].index = offsets[k] + q; 6551 } 6552 } 6553 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6554 6555 /* Send the remote COOs to their owner */ 6556 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6557 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6558 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6559 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6560 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6561 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6562 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6563 6564 PetscCall(PetscFree(offsets)); 6565 PetscCall(PetscFree2(sendto, nentries)); 6566 6567 /* Sort received COOs by row along with the permutation array */ 6568 for (k = 0; k < n2; k++) perm2[k] = k; 6569 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6570 6571 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6572 PetscCount *Cperm1; 6573 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6574 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves)); 6575 6576 /* Support for HYPRE matrices, kind of a hack. 6577 Swap min column with diagonal so that diagonal values will go first */ 6578 PetscBool hypre; 6579 const char *name; 6580 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6581 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6582 if (hypre) { 6583 PetscInt *minj; 6584 PetscBT hasdiag; 6585 6586 PetscCall(PetscBTCreate(m, &hasdiag)); 6587 PetscCall(PetscMalloc1(m, &minj)); 6588 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6589 for (k = i1start; k < rem; k++) { 6590 if (j1[k] < cstart || j1[k] >= cend) continue; 6591 const PetscInt rindex = i1[k] - rstart; 6592 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6593 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6594 } 6595 for (k = 0; k < n2; k++) { 6596 if (j2[k] < cstart || j2[k] >= cend) continue; 6597 const PetscInt rindex = i2[k] - rstart; 6598 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6599 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6600 } 6601 for (k = i1start; k < rem; k++) { 6602 const PetscInt rindex = i1[k] - rstart; 6603 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6604 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6605 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6606 } 6607 for (k = 0; k < n2; k++) { 6608 const PetscInt rindex = i2[k] - rstart; 6609 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6610 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6611 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6612 } 6613 PetscCall(PetscBTDestroy(&hasdiag)); 6614 PetscCall(PetscFree(minj)); 6615 } 6616 6617 /* Split local COOs and received COOs into diag/offdiag portions */ 6618 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6619 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6620 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6621 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6622 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6623 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6624 6625 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6626 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6627 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6628 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6629 6630 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6631 PetscInt *Ai, *Bi; 6632 PetscInt *Aj, *Bj; 6633 6634 PetscCall(PetscMalloc1(m + 1, &Ai)); 6635 PetscCall(PetscMalloc1(m + 1, &Bi)); 6636 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6637 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6638 6639 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6640 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6641 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6642 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6643 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6644 6645 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6646 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6647 6648 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6649 /* expect nonzeros in A/B most likely have local contributing entries */ 6650 PetscInt Annz = Ai[m]; 6651 PetscInt Bnnz = Bi[m]; 6652 PetscCount *Ajmap1_new, *Bjmap1_new; 6653 6654 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6655 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6656 6657 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6658 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6659 6660 PetscCall(PetscFree(Aimap1)); 6661 PetscCall(PetscFree(Ajmap1)); 6662 PetscCall(PetscFree(Bimap1)); 6663 PetscCall(PetscFree(Bjmap1)); 6664 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6665 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6666 PetscCall(PetscFree(perm1)); 6667 PetscCall(PetscFree3(i2, j2, perm2)); 6668 6669 Ajmap1 = Ajmap1_new; 6670 Bjmap1 = Bjmap1_new; 6671 6672 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6673 if (Annz < Annz1 + Annz2) { 6674 PetscInt *Aj_new; 6675 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6676 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6677 PetscCall(PetscFree(Aj)); 6678 Aj = Aj_new; 6679 } 6680 6681 if (Bnnz < Bnnz1 + Bnnz2) { 6682 PetscInt *Bj_new; 6683 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6684 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6685 PetscCall(PetscFree(Bj)); 6686 Bj = Bj_new; 6687 } 6688 6689 /* Create new submatrices for on-process and off-process coupling */ 6690 PetscScalar *Aa, *Ba; 6691 MatType rtype; 6692 Mat_SeqAIJ *a, *b; 6693 PetscObjectState state; 6694 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6695 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6696 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6697 if (cstart) { 6698 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6699 } 6700 PetscCall(MatDestroy(&mpiaij->A)); 6701 PetscCall(MatDestroy(&mpiaij->B)); 6702 PetscCall(MatGetRootType_Private(mat, &rtype)); 6703 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6704 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6705 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6706 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6707 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6708 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6709 6710 a = (Mat_SeqAIJ *)mpiaij->A->data; 6711 b = (Mat_SeqAIJ *)mpiaij->B->data; 6712 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6713 a->free_a = b->free_a = PETSC_TRUE; 6714 a->free_ij = b->free_ij = PETSC_TRUE; 6715 6716 /* conversion must happen AFTER multiply setup */ 6717 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6718 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6719 PetscCall(VecDestroy(&mpiaij->lvec)); 6720 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6721 6722 // Put the COO struct in a container and then attach that to the matrix 6723 PetscCall(PetscMalloc1(1, &coo)); 6724 coo->n = coo_n; 6725 coo->sf = sf2; 6726 coo->sendlen = nleaves; 6727 coo->recvlen = nroots; 6728 coo->Annz = Annz; 6729 coo->Bnnz = Bnnz; 6730 coo->Annz2 = Annz2; 6731 coo->Bnnz2 = Bnnz2; 6732 coo->Atot1 = Atot1; 6733 coo->Atot2 = Atot2; 6734 coo->Btot1 = Btot1; 6735 coo->Btot2 = Btot2; 6736 coo->Ajmap1 = Ajmap1; 6737 coo->Aperm1 = Aperm1; 6738 coo->Bjmap1 = Bjmap1; 6739 coo->Bperm1 = Bperm1; 6740 coo->Aimap2 = Aimap2; 6741 coo->Ajmap2 = Ajmap2; 6742 coo->Aperm2 = Aperm2; 6743 coo->Bimap2 = Bimap2; 6744 coo->Bjmap2 = Bjmap2; 6745 coo->Bperm2 = Bperm2; 6746 coo->Cperm1 = Cperm1; 6747 // Allocate in preallocation. If not used, it has zero cost on host 6748 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6749 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6750 PetscCall(PetscContainerSetPointer(container, coo)); 6751 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6752 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6753 PetscCall(PetscContainerDestroy(&container)); 6754 PetscFunctionReturn(PETSC_SUCCESS); 6755 } 6756 6757 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6758 { 6759 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6760 Mat A = mpiaij->A, B = mpiaij->B; 6761 PetscScalar *Aa, *Ba; 6762 PetscScalar *sendbuf, *recvbuf; 6763 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6764 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6765 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6766 const PetscCount *Cperm1; 6767 PetscContainer container; 6768 MatCOOStruct_MPIAIJ *coo; 6769 6770 PetscFunctionBegin; 6771 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6772 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6773 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6774 sendbuf = coo->sendbuf; 6775 recvbuf = coo->recvbuf; 6776 Ajmap1 = coo->Ajmap1; 6777 Ajmap2 = coo->Ajmap2; 6778 Aimap2 = coo->Aimap2; 6779 Bjmap1 = coo->Bjmap1; 6780 Bjmap2 = coo->Bjmap2; 6781 Bimap2 = coo->Bimap2; 6782 Aperm1 = coo->Aperm1; 6783 Aperm2 = coo->Aperm2; 6784 Bperm1 = coo->Bperm1; 6785 Bperm2 = coo->Bperm2; 6786 Cperm1 = coo->Cperm1; 6787 6788 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6789 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6790 6791 /* Pack entries to be sent to remote */ 6792 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6793 6794 /* Send remote entries to their owner and overlap the communication with local computation */ 6795 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6796 /* Add local entries to A and B */ 6797 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6798 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6799 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6800 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6801 } 6802 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6803 PetscScalar sum = 0.0; 6804 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6805 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6806 } 6807 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6808 6809 /* Add received remote entries to A and B */ 6810 for (PetscCount i = 0; i < coo->Annz2; i++) { 6811 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6812 } 6813 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6814 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6815 } 6816 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6817 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6818 PetscFunctionReturn(PETSC_SUCCESS); 6819 } 6820 6821 /*MC 6822 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6823 6824 Options Database Keys: 6825 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6826 6827 Level: beginner 6828 6829 Notes: 6830 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6831 in this case the values associated with the rows and columns one passes in are set to zero 6832 in the matrix 6833 6834 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6835 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6836 6837 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6838 M*/ 6839 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6840 { 6841 Mat_MPIAIJ *b; 6842 PetscMPIInt size; 6843 6844 PetscFunctionBegin; 6845 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6846 6847 PetscCall(PetscNew(&b)); 6848 B->data = (void *)b; 6849 B->ops[0] = MatOps_Values; 6850 B->assembled = PETSC_FALSE; 6851 B->insertmode = NOT_SET_VALUES; 6852 b->size = size; 6853 6854 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6855 6856 /* build cache for off array entries formed */ 6857 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6858 6859 b->donotstash = PETSC_FALSE; 6860 b->colmap = NULL; 6861 b->garray = NULL; 6862 b->roworiented = PETSC_TRUE; 6863 6864 /* stuff used for matrix vector multiply */ 6865 b->lvec = NULL; 6866 b->Mvctx = NULL; 6867 6868 /* stuff for MatGetRow() */ 6869 b->rowindices = NULL; 6870 b->rowvalues = NULL; 6871 b->getrowactive = PETSC_FALSE; 6872 6873 /* flexible pointer used in CUSPARSE classes */ 6874 b->spptr = NULL; 6875 6876 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6877 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6878 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6879 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6880 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6881 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6882 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6883 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6884 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6885 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6886 #if defined(PETSC_HAVE_CUDA) 6887 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6888 #endif 6889 #if defined(PETSC_HAVE_HIP) 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6891 #endif 6892 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6894 #endif 6895 #if defined(PETSC_HAVE_MKL_SPARSE) 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6897 #endif 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6902 #if defined(PETSC_HAVE_ELEMENTAL) 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6904 #endif 6905 #if defined(PETSC_HAVE_SCALAPACK) 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6907 #endif 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6910 #if defined(PETSC_HAVE_HYPRE) 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6913 #endif 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6918 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6919 PetscFunctionReturn(PETSC_SUCCESS); 6920 } 6921 6922 /*@C 6923 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6924 and "off-diagonal" part of the matrix in CSR format. 6925 6926 Collective 6927 6928 Input Parameters: 6929 + comm - MPI communicator 6930 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6931 . n - This value should be the same as the local size used in creating the 6932 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6933 calculated if `N` is given) For square matrices `n` is almost always `m`. 6934 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6935 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6936 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6937 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6938 . a - matrix values 6939 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6940 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6941 - oa - matrix values 6942 6943 Output Parameter: 6944 . mat - the matrix 6945 6946 Level: advanced 6947 6948 Notes: 6949 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6950 must free the arrays once the matrix has been destroyed and not before. 6951 6952 The `i` and `j` indices are 0 based 6953 6954 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6955 6956 This sets local rows and cannot be used to set off-processor values. 6957 6958 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6959 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6960 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6961 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6962 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6963 communication if it is known that only local entries will be set. 6964 6965 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6966 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6967 @*/ 6968 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6969 { 6970 Mat_MPIAIJ *maij; 6971 6972 PetscFunctionBegin; 6973 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6974 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6975 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6976 PetscCall(MatCreate(comm, mat)); 6977 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6978 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6979 maij = (Mat_MPIAIJ *)(*mat)->data; 6980 6981 (*mat)->preallocated = PETSC_TRUE; 6982 6983 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6984 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6985 6986 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6987 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6988 6989 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6990 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6991 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6992 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6993 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6994 PetscFunctionReturn(PETSC_SUCCESS); 6995 } 6996 6997 typedef struct { 6998 Mat *mp; /* intermediate products */ 6999 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7000 PetscInt cp; /* number of intermediate products */ 7001 7002 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7003 PetscInt *startsj_s, *startsj_r; 7004 PetscScalar *bufa; 7005 Mat P_oth; 7006 7007 /* may take advantage of merging product->B */ 7008 Mat Bloc; /* B-local by merging diag and off-diag */ 7009 7010 /* cusparse does not have support to split between symbolic and numeric phases. 7011 When api_user is true, we don't need to update the numerical values 7012 of the temporary storage */ 7013 PetscBool reusesym; 7014 7015 /* support for COO values insertion */ 7016 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7017 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7018 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7019 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7020 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7021 PetscMemType mtype; 7022 7023 /* customization */ 7024 PetscBool abmerge; 7025 PetscBool P_oth_bind; 7026 } MatMatMPIAIJBACKEND; 7027 7028 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7029 { 7030 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7031 PetscInt i; 7032 7033 PetscFunctionBegin; 7034 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7035 PetscCall(PetscFree(mmdata->bufa)); 7036 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7037 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7038 PetscCall(MatDestroy(&mmdata->P_oth)); 7039 PetscCall(MatDestroy(&mmdata->Bloc)); 7040 PetscCall(PetscSFDestroy(&mmdata->sf)); 7041 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7042 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7043 PetscCall(PetscFree(mmdata->own[0])); 7044 PetscCall(PetscFree(mmdata->own)); 7045 PetscCall(PetscFree(mmdata->off[0])); 7046 PetscCall(PetscFree(mmdata->off)); 7047 PetscCall(PetscFree(mmdata)); 7048 PetscFunctionReturn(PETSC_SUCCESS); 7049 } 7050 7051 /* Copy selected n entries with indices in idx[] of A to v[]. 7052 If idx is NULL, copy the whole data array of A to v[] 7053 */ 7054 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7055 { 7056 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7057 7058 PetscFunctionBegin; 7059 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7060 if (f) { 7061 PetscCall((*f)(A, n, idx, v)); 7062 } else { 7063 const PetscScalar *vv; 7064 7065 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7066 if (n && idx) { 7067 PetscScalar *w = v; 7068 const PetscInt *oi = idx; 7069 PetscInt j; 7070 7071 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7072 } else { 7073 PetscCall(PetscArraycpy(v, vv, n)); 7074 } 7075 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7076 } 7077 PetscFunctionReturn(PETSC_SUCCESS); 7078 } 7079 7080 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7081 { 7082 MatMatMPIAIJBACKEND *mmdata; 7083 PetscInt i, n_d, n_o; 7084 7085 PetscFunctionBegin; 7086 MatCheckProduct(C, 1); 7087 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7088 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7089 if (!mmdata->reusesym) { /* update temporary matrices */ 7090 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7091 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7092 } 7093 mmdata->reusesym = PETSC_FALSE; 7094 7095 for (i = 0; i < mmdata->cp; i++) { 7096 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7097 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7098 } 7099 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7100 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7101 7102 if (mmdata->mptmp[i]) continue; 7103 if (noff) { 7104 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7105 7106 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7107 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7108 n_o += noff; 7109 n_d += nown; 7110 } else { 7111 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7112 7113 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7114 n_d += mm->nz; 7115 } 7116 } 7117 if (mmdata->hasoffproc) { /* offprocess insertion */ 7118 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7119 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7120 } 7121 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7122 PetscFunctionReturn(PETSC_SUCCESS); 7123 } 7124 7125 /* Support for Pt * A, A * P, or Pt * A * P */ 7126 #define MAX_NUMBER_INTERMEDIATE 4 7127 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7128 { 7129 Mat_Product *product = C->product; 7130 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7131 Mat_MPIAIJ *a, *p; 7132 MatMatMPIAIJBACKEND *mmdata; 7133 ISLocalToGlobalMapping P_oth_l2g = NULL; 7134 IS glob = NULL; 7135 const char *prefix; 7136 char pprefix[256]; 7137 const PetscInt *globidx, *P_oth_idx; 7138 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7139 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7140 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7141 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7142 /* a base offset; type-2: sparse with a local to global map table */ 7143 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7144 7145 MatProductType ptype; 7146 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7147 PetscMPIInt size; 7148 7149 PetscFunctionBegin; 7150 MatCheckProduct(C, 1); 7151 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7152 ptype = product->type; 7153 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7154 ptype = MATPRODUCT_AB; 7155 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7156 } 7157 switch (ptype) { 7158 case MATPRODUCT_AB: 7159 A = product->A; 7160 P = product->B; 7161 m = A->rmap->n; 7162 n = P->cmap->n; 7163 M = A->rmap->N; 7164 N = P->cmap->N; 7165 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7166 break; 7167 case MATPRODUCT_AtB: 7168 P = product->A; 7169 A = product->B; 7170 m = P->cmap->n; 7171 n = A->cmap->n; 7172 M = P->cmap->N; 7173 N = A->cmap->N; 7174 hasoffproc = PETSC_TRUE; 7175 break; 7176 case MATPRODUCT_PtAP: 7177 A = product->A; 7178 P = product->B; 7179 m = P->cmap->n; 7180 n = P->cmap->n; 7181 M = P->cmap->N; 7182 N = P->cmap->N; 7183 hasoffproc = PETSC_TRUE; 7184 break; 7185 default: 7186 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7187 } 7188 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7189 if (size == 1) hasoffproc = PETSC_FALSE; 7190 7191 /* defaults */ 7192 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7193 mp[i] = NULL; 7194 mptmp[i] = PETSC_FALSE; 7195 rmapt[i] = -1; 7196 cmapt[i] = -1; 7197 rmapa[i] = NULL; 7198 cmapa[i] = NULL; 7199 } 7200 7201 /* customization */ 7202 PetscCall(PetscNew(&mmdata)); 7203 mmdata->reusesym = product->api_user; 7204 if (ptype == MATPRODUCT_AB) { 7205 if (product->api_user) { 7206 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7207 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7208 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7209 PetscOptionsEnd(); 7210 } else { 7211 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7212 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7213 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7214 PetscOptionsEnd(); 7215 } 7216 } else if (ptype == MATPRODUCT_PtAP) { 7217 if (product->api_user) { 7218 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7219 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7220 PetscOptionsEnd(); 7221 } else { 7222 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7223 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7224 PetscOptionsEnd(); 7225 } 7226 } 7227 a = (Mat_MPIAIJ *)A->data; 7228 p = (Mat_MPIAIJ *)P->data; 7229 PetscCall(MatSetSizes(C, m, n, M, N)); 7230 PetscCall(PetscLayoutSetUp(C->rmap)); 7231 PetscCall(PetscLayoutSetUp(C->cmap)); 7232 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7233 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7234 7235 cp = 0; 7236 switch (ptype) { 7237 case MATPRODUCT_AB: /* A * P */ 7238 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7239 7240 /* A_diag * P_local (merged or not) */ 7241 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7242 /* P is product->B */ 7243 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7244 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7245 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7246 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7247 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7248 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7249 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7250 mp[cp]->product->api_user = product->api_user; 7251 PetscCall(MatProductSetFromOptions(mp[cp])); 7252 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7253 PetscCall(ISGetIndices(glob, &globidx)); 7254 rmapt[cp] = 1; 7255 cmapt[cp] = 2; 7256 cmapa[cp] = globidx; 7257 mptmp[cp] = PETSC_FALSE; 7258 cp++; 7259 } else { /* A_diag * P_diag and A_diag * P_off */ 7260 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7261 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7262 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7263 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7264 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7265 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7266 mp[cp]->product->api_user = product->api_user; 7267 PetscCall(MatProductSetFromOptions(mp[cp])); 7268 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7269 rmapt[cp] = 1; 7270 cmapt[cp] = 1; 7271 mptmp[cp] = PETSC_FALSE; 7272 cp++; 7273 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7274 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7275 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7276 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7277 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7278 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7279 mp[cp]->product->api_user = product->api_user; 7280 PetscCall(MatProductSetFromOptions(mp[cp])); 7281 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7282 rmapt[cp] = 1; 7283 cmapt[cp] = 2; 7284 cmapa[cp] = p->garray; 7285 mptmp[cp] = PETSC_FALSE; 7286 cp++; 7287 } 7288 7289 /* A_off * P_other */ 7290 if (mmdata->P_oth) { 7291 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7292 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7293 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7294 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7295 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7296 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7297 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7298 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7299 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7300 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7301 mp[cp]->product->api_user = product->api_user; 7302 PetscCall(MatProductSetFromOptions(mp[cp])); 7303 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7304 rmapt[cp] = 1; 7305 cmapt[cp] = 2; 7306 cmapa[cp] = P_oth_idx; 7307 mptmp[cp] = PETSC_FALSE; 7308 cp++; 7309 } 7310 break; 7311 7312 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7313 /* A is product->B */ 7314 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7315 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7316 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7317 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7318 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7319 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7320 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7321 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7322 mp[cp]->product->api_user = product->api_user; 7323 PetscCall(MatProductSetFromOptions(mp[cp])); 7324 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7325 PetscCall(ISGetIndices(glob, &globidx)); 7326 rmapt[cp] = 2; 7327 rmapa[cp] = globidx; 7328 cmapt[cp] = 2; 7329 cmapa[cp] = globidx; 7330 mptmp[cp] = PETSC_FALSE; 7331 cp++; 7332 } else { 7333 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7334 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7335 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7336 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7337 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7338 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7339 mp[cp]->product->api_user = product->api_user; 7340 PetscCall(MatProductSetFromOptions(mp[cp])); 7341 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7342 PetscCall(ISGetIndices(glob, &globidx)); 7343 rmapt[cp] = 1; 7344 cmapt[cp] = 2; 7345 cmapa[cp] = globidx; 7346 mptmp[cp] = PETSC_FALSE; 7347 cp++; 7348 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7349 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7350 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7351 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7352 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7353 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7354 mp[cp]->product->api_user = product->api_user; 7355 PetscCall(MatProductSetFromOptions(mp[cp])); 7356 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7357 rmapt[cp] = 2; 7358 rmapa[cp] = p->garray; 7359 cmapt[cp] = 2; 7360 cmapa[cp] = globidx; 7361 mptmp[cp] = PETSC_FALSE; 7362 cp++; 7363 } 7364 break; 7365 case MATPRODUCT_PtAP: 7366 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7367 /* P is product->B */ 7368 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7369 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7370 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7371 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7372 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7373 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7374 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7375 mp[cp]->product->api_user = product->api_user; 7376 PetscCall(MatProductSetFromOptions(mp[cp])); 7377 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7378 PetscCall(ISGetIndices(glob, &globidx)); 7379 rmapt[cp] = 2; 7380 rmapa[cp] = globidx; 7381 cmapt[cp] = 2; 7382 cmapa[cp] = globidx; 7383 mptmp[cp] = PETSC_FALSE; 7384 cp++; 7385 if (mmdata->P_oth) { 7386 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7387 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7388 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7389 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7390 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7391 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7392 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7393 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7394 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7395 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7396 mp[cp]->product->api_user = product->api_user; 7397 PetscCall(MatProductSetFromOptions(mp[cp])); 7398 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7399 mptmp[cp] = PETSC_TRUE; 7400 cp++; 7401 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7402 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7403 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7404 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7405 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7406 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7407 mp[cp]->product->api_user = product->api_user; 7408 PetscCall(MatProductSetFromOptions(mp[cp])); 7409 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7410 rmapt[cp] = 2; 7411 rmapa[cp] = globidx; 7412 cmapt[cp] = 2; 7413 cmapa[cp] = P_oth_idx; 7414 mptmp[cp] = PETSC_FALSE; 7415 cp++; 7416 } 7417 break; 7418 default: 7419 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7420 } 7421 /* sanity check */ 7422 if (size > 1) 7423 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7424 7425 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7426 for (i = 0; i < cp; i++) { 7427 mmdata->mp[i] = mp[i]; 7428 mmdata->mptmp[i] = mptmp[i]; 7429 } 7430 mmdata->cp = cp; 7431 C->product->data = mmdata; 7432 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7433 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7434 7435 /* memory type */ 7436 mmdata->mtype = PETSC_MEMTYPE_HOST; 7437 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7438 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7439 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7440 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7441 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7442 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7443 7444 /* prepare coo coordinates for values insertion */ 7445 7446 /* count total nonzeros of those intermediate seqaij Mats 7447 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7448 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7449 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7450 */ 7451 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7452 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7453 if (mptmp[cp]) continue; 7454 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7455 const PetscInt *rmap = rmapa[cp]; 7456 const PetscInt mr = mp[cp]->rmap->n; 7457 const PetscInt rs = C->rmap->rstart; 7458 const PetscInt re = C->rmap->rend; 7459 const PetscInt *ii = mm->i; 7460 for (i = 0; i < mr; i++) { 7461 const PetscInt gr = rmap[i]; 7462 const PetscInt nz = ii[i + 1] - ii[i]; 7463 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7464 else ncoo_oown += nz; /* this row is local */ 7465 } 7466 } else ncoo_d += mm->nz; 7467 } 7468 7469 /* 7470 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7471 7472 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7473 7474 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7475 7476 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7477 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7478 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7479 7480 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7481 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7482 */ 7483 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7484 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7485 7486 /* gather (i,j) of nonzeros inserted by remote procs */ 7487 if (hasoffproc) { 7488 PetscSF msf; 7489 PetscInt ncoo2, *coo_i2, *coo_j2; 7490 7491 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7492 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7493 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7494 7495 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7496 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7497 PetscInt *idxoff = mmdata->off[cp]; 7498 PetscInt *idxown = mmdata->own[cp]; 7499 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7500 const PetscInt *rmap = rmapa[cp]; 7501 const PetscInt *cmap = cmapa[cp]; 7502 const PetscInt *ii = mm->i; 7503 PetscInt *coi = coo_i + ncoo_o; 7504 PetscInt *coj = coo_j + ncoo_o; 7505 const PetscInt mr = mp[cp]->rmap->n; 7506 const PetscInt rs = C->rmap->rstart; 7507 const PetscInt re = C->rmap->rend; 7508 const PetscInt cs = C->cmap->rstart; 7509 for (i = 0; i < mr; i++) { 7510 const PetscInt *jj = mm->j + ii[i]; 7511 const PetscInt gr = rmap[i]; 7512 const PetscInt nz = ii[i + 1] - ii[i]; 7513 if (gr < rs || gr >= re) { /* this is an offproc row */ 7514 for (j = ii[i]; j < ii[i + 1]; j++) { 7515 *coi++ = gr; 7516 *idxoff++ = j; 7517 } 7518 if (!cmapt[cp]) { /* already global */ 7519 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7520 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7521 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7522 } else { /* offdiag */ 7523 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7524 } 7525 ncoo_o += nz; 7526 } else { /* this is a local row */ 7527 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7528 } 7529 } 7530 } 7531 mmdata->off[cp + 1] = idxoff; 7532 mmdata->own[cp + 1] = idxown; 7533 } 7534 7535 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7536 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7537 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7538 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7539 ncoo = ncoo_d + ncoo_oown + ncoo2; 7540 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7541 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7542 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7543 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7544 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7545 PetscCall(PetscFree2(coo_i, coo_j)); 7546 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7547 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7548 coo_i = coo_i2; 7549 coo_j = coo_j2; 7550 } else { /* no offproc values insertion */ 7551 ncoo = ncoo_d; 7552 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7553 7554 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7555 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7556 PetscCall(PetscSFSetUp(mmdata->sf)); 7557 } 7558 mmdata->hasoffproc = hasoffproc; 7559 7560 /* gather (i,j) of nonzeros inserted locally */ 7561 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7562 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7563 PetscInt *coi = coo_i + ncoo_d; 7564 PetscInt *coj = coo_j + ncoo_d; 7565 const PetscInt *jj = mm->j; 7566 const PetscInt *ii = mm->i; 7567 const PetscInt *cmap = cmapa[cp]; 7568 const PetscInt *rmap = rmapa[cp]; 7569 const PetscInt mr = mp[cp]->rmap->n; 7570 const PetscInt rs = C->rmap->rstart; 7571 const PetscInt re = C->rmap->rend; 7572 const PetscInt cs = C->cmap->rstart; 7573 7574 if (mptmp[cp]) continue; 7575 if (rmapt[cp] == 1) { /* consecutive rows */ 7576 /* fill coo_i */ 7577 for (i = 0; i < mr; i++) { 7578 const PetscInt gr = i + rs; 7579 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7580 } 7581 /* fill coo_j */ 7582 if (!cmapt[cp]) { /* type-0, already global */ 7583 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7584 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7585 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7586 } else { /* type-2, local to global for sparse columns */ 7587 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7588 } 7589 ncoo_d += mm->nz; 7590 } else if (rmapt[cp] == 2) { /* sparse rows */ 7591 for (i = 0; i < mr; i++) { 7592 const PetscInt *jj = mm->j + ii[i]; 7593 const PetscInt gr = rmap[i]; 7594 const PetscInt nz = ii[i + 1] - ii[i]; 7595 if (gr >= rs && gr < re) { /* local rows */ 7596 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7597 if (!cmapt[cp]) { /* type-0, already global */ 7598 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7599 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7600 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7601 } else { /* type-2, local to global for sparse columns */ 7602 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7603 } 7604 ncoo_d += nz; 7605 } 7606 } 7607 } 7608 } 7609 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7610 PetscCall(ISDestroy(&glob)); 7611 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7612 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7613 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7614 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7615 7616 /* preallocate with COO data */ 7617 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7618 PetscCall(PetscFree2(coo_i, coo_j)); 7619 PetscFunctionReturn(PETSC_SUCCESS); 7620 } 7621 7622 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7623 { 7624 Mat_Product *product = mat->product; 7625 #if defined(PETSC_HAVE_DEVICE) 7626 PetscBool match = PETSC_FALSE; 7627 PetscBool usecpu = PETSC_FALSE; 7628 #else 7629 PetscBool match = PETSC_TRUE; 7630 #endif 7631 7632 PetscFunctionBegin; 7633 MatCheckProduct(mat, 1); 7634 #if defined(PETSC_HAVE_DEVICE) 7635 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7636 if (match) { /* we can always fallback to the CPU if requested */ 7637 switch (product->type) { 7638 case MATPRODUCT_AB: 7639 if (product->api_user) { 7640 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7641 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7642 PetscOptionsEnd(); 7643 } else { 7644 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7645 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7646 PetscOptionsEnd(); 7647 } 7648 break; 7649 case MATPRODUCT_AtB: 7650 if (product->api_user) { 7651 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7652 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7653 PetscOptionsEnd(); 7654 } else { 7655 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7656 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7657 PetscOptionsEnd(); 7658 } 7659 break; 7660 case MATPRODUCT_PtAP: 7661 if (product->api_user) { 7662 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7663 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7664 PetscOptionsEnd(); 7665 } else { 7666 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7667 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7668 PetscOptionsEnd(); 7669 } 7670 break; 7671 default: 7672 break; 7673 } 7674 match = (PetscBool)!usecpu; 7675 } 7676 #endif 7677 if (match) { 7678 switch (product->type) { 7679 case MATPRODUCT_AB: 7680 case MATPRODUCT_AtB: 7681 case MATPRODUCT_PtAP: 7682 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7683 break; 7684 default: 7685 break; 7686 } 7687 } 7688 /* fallback to MPIAIJ ops */ 7689 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7690 PetscFunctionReturn(PETSC_SUCCESS); 7691 } 7692 7693 /* 7694 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7695 7696 n - the number of block indices in cc[] 7697 cc - the block indices (must be large enough to contain the indices) 7698 */ 7699 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7700 { 7701 PetscInt cnt = -1, nidx, j; 7702 const PetscInt *idx; 7703 7704 PetscFunctionBegin; 7705 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7706 if (nidx) { 7707 cnt = 0; 7708 cc[cnt] = idx[0] / bs; 7709 for (j = 1; j < nidx; j++) { 7710 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7711 } 7712 } 7713 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7714 *n = cnt + 1; 7715 PetscFunctionReturn(PETSC_SUCCESS); 7716 } 7717 7718 /* 7719 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7720 7721 ncollapsed - the number of block indices 7722 collapsed - the block indices (must be large enough to contain the indices) 7723 */ 7724 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7725 { 7726 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7727 7728 PetscFunctionBegin; 7729 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7730 for (i = start + 1; i < start + bs; i++) { 7731 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7732 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7733 cprevtmp = cprev; 7734 cprev = merged; 7735 merged = cprevtmp; 7736 } 7737 *ncollapsed = nprev; 7738 if (collapsed) *collapsed = cprev; 7739 PetscFunctionReturn(PETSC_SUCCESS); 7740 } 7741 7742 /* 7743 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7744 7745 Input Parameter: 7746 . Amat - matrix 7747 - symmetrize - make the result symmetric 7748 + scale - scale with diagonal 7749 7750 Output Parameter: 7751 . a_Gmat - output scalar graph >= 0 7752 7753 */ 7754 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7755 { 7756 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7757 MPI_Comm comm; 7758 Mat Gmat; 7759 PetscBool ismpiaij, isseqaij; 7760 Mat a, b, c; 7761 MatType jtype; 7762 7763 PetscFunctionBegin; 7764 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7765 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7766 PetscCall(MatGetSize(Amat, &MM, &NN)); 7767 PetscCall(MatGetBlockSize(Amat, &bs)); 7768 nloc = (Iend - Istart) / bs; 7769 7770 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7771 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7772 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7773 7774 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7775 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7776 implementation */ 7777 if (bs > 1) { 7778 PetscCall(MatGetType(Amat, &jtype)); 7779 PetscCall(MatCreate(comm, &Gmat)); 7780 PetscCall(MatSetType(Gmat, jtype)); 7781 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7782 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7783 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7784 PetscInt *d_nnz, *o_nnz; 7785 MatScalar *aa, val, *AA; 7786 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7787 if (isseqaij) { 7788 a = Amat; 7789 b = NULL; 7790 } else { 7791 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7792 a = d->A; 7793 b = d->B; 7794 } 7795 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7796 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7797 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7798 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7799 const PetscInt *cols1, *cols2; 7800 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7801 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7802 nnz[brow / bs] = nc2 / bs; 7803 if (nc2 % bs) ok = 0; 7804 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7805 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7806 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7807 if (nc1 != nc2) ok = 0; 7808 else { 7809 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7810 if (cols1[jj] != cols2[jj]) ok = 0; 7811 if (cols1[jj] % bs != jj % bs) ok = 0; 7812 } 7813 } 7814 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7815 } 7816 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7817 if (!ok) { 7818 PetscCall(PetscFree2(d_nnz, o_nnz)); 7819 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7820 goto old_bs; 7821 } 7822 } 7823 } 7824 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7825 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7826 PetscCall(PetscFree2(d_nnz, o_nnz)); 7827 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7828 // diag 7829 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7830 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7831 ai = aseq->i; 7832 n = ai[brow + 1] - ai[brow]; 7833 aj = aseq->j + ai[brow]; 7834 for (int k = 0; k < n; k += bs) { // block columns 7835 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7836 val = 0; 7837 for (int ii = 0; ii < bs; ii++) { // rows in block 7838 aa = aseq->a + ai[brow + ii] + k; 7839 for (int jj = 0; jj < bs; jj++) { // columns in block 7840 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7841 } 7842 } 7843 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7844 AA[k / bs] = val; 7845 } 7846 grow = Istart / bs + brow / bs; 7847 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7848 } 7849 // off-diag 7850 if (ismpiaij) { 7851 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7852 const PetscScalar *vals; 7853 const PetscInt *cols, *garray = aij->garray; 7854 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7855 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7856 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7857 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7858 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7859 AA[k / bs] = 0; 7860 AJ[cidx] = garray[cols[k]] / bs; 7861 } 7862 nc = ncols / bs; 7863 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7864 for (int ii = 0; ii < bs; ii++) { // rows in block 7865 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7866 for (int k = 0; k < ncols; k += bs) { 7867 for (int jj = 0; jj < bs; jj++) { // cols in block 7868 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7869 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7870 } 7871 } 7872 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7873 } 7874 grow = Istart / bs + brow / bs; 7875 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7876 } 7877 } 7878 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7879 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7880 PetscCall(PetscFree2(AA, AJ)); 7881 } else { 7882 const PetscScalar *vals; 7883 const PetscInt *idx; 7884 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7885 old_bs: 7886 /* 7887 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7888 */ 7889 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7890 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7891 if (isseqaij) { 7892 PetscInt max_d_nnz; 7893 /* 7894 Determine exact preallocation count for (sequential) scalar matrix 7895 */ 7896 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7897 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7898 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7899 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7900 PetscCall(PetscFree3(w0, w1, w2)); 7901 } else if (ismpiaij) { 7902 Mat Daij, Oaij; 7903 const PetscInt *garray; 7904 PetscInt max_d_nnz; 7905 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7906 /* 7907 Determine exact preallocation count for diagonal block portion of scalar matrix 7908 */ 7909 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7910 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7911 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7912 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7913 PetscCall(PetscFree3(w0, w1, w2)); 7914 /* 7915 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7916 */ 7917 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7918 o_nnz[jj] = 0; 7919 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7920 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7921 o_nnz[jj] += ncols; 7922 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7923 } 7924 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7925 } 7926 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7927 /* get scalar copy (norms) of matrix */ 7928 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7929 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7930 PetscCall(PetscFree2(d_nnz, o_nnz)); 7931 for (Ii = Istart; Ii < Iend; Ii++) { 7932 PetscInt dest_row = Ii / bs; 7933 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7934 for (jj = 0; jj < ncols; jj++) { 7935 PetscInt dest_col = idx[jj] / bs; 7936 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7937 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7938 } 7939 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7940 } 7941 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7942 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7943 } 7944 } else { 7945 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7946 else { 7947 Gmat = Amat; 7948 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7949 } 7950 if (isseqaij) { 7951 a = Gmat; 7952 b = NULL; 7953 } else { 7954 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7955 a = d->A; 7956 b = d->B; 7957 } 7958 if (filter >= 0 || scale) { 7959 /* take absolute value of each entry */ 7960 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7961 MatInfo info; 7962 PetscScalar *avals; 7963 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7964 PetscCall(MatSeqAIJGetArray(c, &avals)); 7965 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7966 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7967 } 7968 } 7969 } 7970 if (symmetrize) { 7971 PetscBool isset, issym; 7972 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7973 if (!isset || !issym) { 7974 Mat matTrans; 7975 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7976 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7977 PetscCall(MatDestroy(&matTrans)); 7978 } 7979 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7980 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7981 if (scale) { 7982 /* scale c for all diagonal values = 1 or -1 */ 7983 Vec diag; 7984 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7985 PetscCall(MatGetDiagonal(Gmat, diag)); 7986 PetscCall(VecReciprocal(diag)); 7987 PetscCall(VecSqrtAbs(diag)); 7988 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7989 PetscCall(VecDestroy(&diag)); 7990 } 7991 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7992 7993 if (filter >= 0) { 7994 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 7995 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 7996 } 7997 *a_Gmat = Gmat; 7998 PetscFunctionReturn(PETSC_SUCCESS); 7999 } 8000 8001 /* 8002 Special version for direct calls from Fortran 8003 */ 8004 #include <petsc/private/fortranimpl.h> 8005 8006 /* Change these macros so can be used in void function */ 8007 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8008 #undef PetscCall 8009 #define PetscCall(...) \ 8010 do { \ 8011 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8012 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8013 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8014 return; \ 8015 } \ 8016 } while (0) 8017 8018 #undef SETERRQ 8019 #define SETERRQ(comm, ierr, ...) \ 8020 do { \ 8021 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8022 return; \ 8023 } while (0) 8024 8025 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8026 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8027 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8028 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8029 #else 8030 #endif 8031 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8032 { 8033 Mat mat = *mmat; 8034 PetscInt m = *mm, n = *mn; 8035 InsertMode addv = *maddv; 8036 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8037 PetscScalar value; 8038 8039 MatCheckPreallocated(mat, 1); 8040 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8041 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8042 { 8043 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8044 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8045 PetscBool roworiented = aij->roworiented; 8046 8047 /* Some Variables required in the macro */ 8048 Mat A = aij->A; 8049 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8050 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8051 MatScalar *aa; 8052 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8053 Mat B = aij->B; 8054 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8055 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8056 MatScalar *ba; 8057 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8058 * cannot use "#if defined" inside a macro. */ 8059 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8060 8061 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8062 PetscInt nonew = a->nonew; 8063 MatScalar *ap1, *ap2; 8064 8065 PetscFunctionBegin; 8066 PetscCall(MatSeqAIJGetArray(A, &aa)); 8067 PetscCall(MatSeqAIJGetArray(B, &ba)); 8068 for (i = 0; i < m; i++) { 8069 if (im[i] < 0) continue; 8070 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8071 if (im[i] >= rstart && im[i] < rend) { 8072 row = im[i] - rstart; 8073 lastcol1 = -1; 8074 rp1 = aj + ai[row]; 8075 ap1 = aa + ai[row]; 8076 rmax1 = aimax[row]; 8077 nrow1 = ailen[row]; 8078 low1 = 0; 8079 high1 = nrow1; 8080 lastcol2 = -1; 8081 rp2 = bj + bi[row]; 8082 ap2 = ba + bi[row]; 8083 rmax2 = bimax[row]; 8084 nrow2 = bilen[row]; 8085 low2 = 0; 8086 high2 = nrow2; 8087 8088 for (j = 0; j < n; j++) { 8089 if (roworiented) value = v[i * n + j]; 8090 else value = v[i + j * m]; 8091 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8092 if (in[j] >= cstart && in[j] < cend) { 8093 col = in[j] - cstart; 8094 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8095 } else if (in[j] < 0) continue; 8096 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8097 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8098 } else { 8099 if (mat->was_assembled) { 8100 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8101 #if defined(PETSC_USE_CTABLE) 8102 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8103 col--; 8104 #else 8105 col = aij->colmap[in[j]] - 1; 8106 #endif 8107 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8108 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8109 col = in[j]; 8110 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8111 B = aij->B; 8112 b = (Mat_SeqAIJ *)B->data; 8113 bimax = b->imax; 8114 bi = b->i; 8115 bilen = b->ilen; 8116 bj = b->j; 8117 rp2 = bj + bi[row]; 8118 ap2 = ba + bi[row]; 8119 rmax2 = bimax[row]; 8120 nrow2 = bilen[row]; 8121 low2 = 0; 8122 high2 = nrow2; 8123 bm = aij->B->rmap->n; 8124 ba = b->a; 8125 inserted = PETSC_FALSE; 8126 } 8127 } else col = in[j]; 8128 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8129 } 8130 } 8131 } else if (!aij->donotstash) { 8132 if (roworiented) { 8133 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8134 } else { 8135 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8136 } 8137 } 8138 } 8139 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8140 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8141 } 8142 PetscFunctionReturnVoid(); 8143 } 8144 8145 /* Undefining these here since they were redefined from their original definition above! No 8146 * other PETSc functions should be defined past this point, as it is impossible to recover the 8147 * original definitions */ 8148 #undef PetscCall 8149 #undef SETERRQ 8150