1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 167 PetscFunctionReturn(PETSC_SUCCESS); 168 } 169 170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 171 { 172 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 173 174 PetscFunctionBegin; 175 if (mat->A) { 176 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 177 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 178 } 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181 182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 183 { 184 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 186 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 187 const PetscInt *ia, *ib; 188 const MatScalar *aa, *bb, *aav, *bav; 189 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 190 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 191 192 PetscFunctionBegin; 193 *keptrows = NULL; 194 195 ia = a->i; 196 ib = b->i; 197 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 198 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 199 for (i = 0; i < m; i++) { 200 na = ia[i + 1] - ia[i]; 201 nb = ib[i + 1] - ib[i]; 202 if (!na && !nb) { 203 cnt++; 204 goto ok1; 205 } 206 aa = aav + ia[i]; 207 for (j = 0; j < na; j++) { 208 if (aa[j] != 0.0) goto ok1; 209 } 210 bb = bav ? bav + ib[i] : NULL; 211 for (j = 0; j < nb; j++) { 212 if (bb[j] != 0.0) goto ok1; 213 } 214 cnt++; 215 ok1:; 216 } 217 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 218 if (!n0rows) { 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 220 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 221 PetscFunctionReturn(PETSC_SUCCESS); 222 } 223 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 224 cnt = 0; 225 for (i = 0; i < m; i++) { 226 na = ia[i + 1] - ia[i]; 227 nb = ib[i + 1] - ib[i]; 228 if (!na && !nb) continue; 229 aa = aav + ia[i]; 230 for (j = 0; j < na; j++) { 231 if (aa[j] != 0.0) { 232 rows[cnt++] = rstart + i; 233 goto ok2; 234 } 235 } 236 bb = bav ? bav + ib[i] : NULL; 237 for (j = 0; j < nb; j++) { 238 if (bb[j] != 0.0) { 239 rows[cnt++] = rstart + i; 240 goto ok2; 241 } 242 } 243 ok2:; 244 } 245 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 247 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 252 { 253 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 254 PetscBool cong; 255 256 PetscFunctionBegin; 257 PetscCall(MatHasCongruentLayouts(Y, &cong)); 258 if (Y->assembled && cong) { 259 PetscCall(MatDiagonalSet(aij->A, D, is)); 260 } else { 261 PetscCall(MatDiagonalSet_Default(Y, D, is)); 262 } 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 267 { 268 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 269 PetscInt i, rstart, nrows, *rows; 270 271 PetscFunctionBegin; 272 *zrows = NULL; 273 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 274 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 275 for (i = 0; i < nrows; i++) rows[i] += rstart; 276 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 283 PetscInt i, m, n, *garray = aij->garray; 284 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 285 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 286 PetscReal *work; 287 const PetscScalar *dummy; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 if (type == NORM_INFINITY) { 313 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 314 } else { 315 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 316 } 317 PetscCall(PetscFree(work)); 318 if (type == NORM_2) { 319 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 320 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 321 for (i = 0; i < n; i++) reductions[i] /= m; 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 327 { 328 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 329 IS sis, gis; 330 const PetscInt *isis, *igis; 331 PetscInt n, *iis, nsis, ngis, rstart, i; 332 333 PetscFunctionBegin; 334 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 335 PetscCall(MatFindNonzeroRows(a->B, &gis)); 336 PetscCall(ISGetSize(gis, &ngis)); 337 PetscCall(ISGetSize(sis, &nsis)); 338 PetscCall(ISGetIndices(sis, &isis)); 339 PetscCall(ISGetIndices(gis, &igis)); 340 341 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 342 PetscCall(PetscArraycpy(iis, igis, ngis)); 343 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 344 n = ngis + nsis; 345 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 346 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 347 for (i = 0; i < n; i++) iis[i] += rstart; 348 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 349 350 PetscCall(ISRestoreIndices(sis, &isis)); 351 PetscCall(ISRestoreIndices(gis, &igis)); 352 PetscCall(ISDestroy(&sis)); 353 PetscCall(ISDestroy(&gis)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 /* 358 Local utility routine that creates a mapping from the global column 359 number to the local number in the off-diagonal part of the local 360 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 361 a slightly higher hash table cost; without it it is not scalable (each processor 362 has an order N integer array but is fast to access. 363 */ 364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 365 { 366 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 367 PetscInt n = aij->B->cmap->n, i; 368 369 PetscFunctionBegin; 370 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 371 #if defined(PETSC_USE_CTABLE) 372 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 373 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 374 #else 375 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 376 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 377 #endif 378 PetscFunctionReturn(PETSC_SUCCESS); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 382 do { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col; \ 386 while (high1 - low1 > 5) { \ 387 t = (low1 + high1) / 2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i = low1; _i < high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) { \ 395 ap1[_i] += value; \ 396 /* Not sure LogFlops will slow dow the code or not */ \ 397 (void)PetscLogFlops(1.0); \ 398 } else ap1[_i] = value; \ 399 goto a_noinsert; \ 400 } \ 401 } \ 402 if (value == 0.0 && ignorezeroentries && row != col) { \ 403 low1 = 0; \ 404 high1 = nrow1; \ 405 goto a_noinsert; \ 406 } \ 407 if (nonew == 1) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 413 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 414 N = nrow1++ - 1; \ 415 a->nz++; \ 416 high1++; \ 417 /* shift up all the later entries in this row */ \ 418 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 419 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 B->nonzerostate++; \ 468 b_noinsert:; \ 469 bilen[row] = nrow2; \ 470 } while (0) 471 472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 473 { 474 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 475 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 476 PetscInt l, *garray = mat->garray, diag; 477 PetscScalar *aa, *ba; 478 479 PetscFunctionBegin; 480 /* code only works for square matrices A */ 481 482 /* find size of row to the left of the diagonal part */ 483 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 484 row = row - diag; 485 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 486 if (garray[b->j[b->i[row] + l]] > diag) break; 487 } 488 if (l) { 489 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 490 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 491 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 492 } 493 494 /* diagonal part */ 495 if (a->i[row + 1] - a->i[row]) { 496 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 497 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 498 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 499 } 500 501 /* right of diagonal part */ 502 if (b->i[row + 1] - b->i[row] - l) { 503 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 504 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 505 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 506 } 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 511 { 512 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 513 PetscScalar value = 0.0; 514 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 515 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 516 PetscBool roworiented = aij->roworiented; 517 518 /* Some Variables required in the macro */ 519 Mat A = aij->A; 520 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 521 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 522 PetscBool ignorezeroentries = a->ignorezeroentries; 523 Mat B = aij->B; 524 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 525 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 526 MatScalar *aa, *ba; 527 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 528 PetscInt nonew; 529 MatScalar *ap1, *ap2; 530 531 PetscFunctionBegin; 532 PetscCall(MatSeqAIJGetArray(A, &aa)); 533 PetscCall(MatSeqAIJGetArray(B, &ba)); 534 for (i = 0; i < m; i++) { 535 if (im[i] < 0) continue; 536 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 537 if (im[i] >= rstart && im[i] < rend) { 538 row = im[i] - rstart; 539 lastcol1 = -1; 540 rp1 = aj ? aj + ai[row] : NULL; 541 ap1 = aa ? aa + ai[row] : NULL; 542 rmax1 = aimax[row]; 543 nrow1 = ailen[row]; 544 low1 = 0; 545 high1 = nrow1; 546 lastcol2 = -1; 547 rp2 = bj ? bj + bi[row] : NULL; 548 ap2 = ba ? ba + bi[row] : NULL; 549 rmax2 = bimax[row]; 550 nrow2 = bilen[row]; 551 low2 = 0; 552 high2 = nrow2; 553 554 for (j = 0; j < n; j++) { 555 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 556 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 557 if (in[j] >= cstart && in[j] < cend) { 558 col = in[j] - cstart; 559 nonew = a->nonew; 560 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 561 } else if (in[j] < 0) { 562 continue; 563 } else { 564 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 565 if (mat->was_assembled) { 566 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 567 #if defined(PETSC_USE_CTABLE) 568 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 569 col--; 570 #else 571 col = aij->colmap[in[j]] - 1; 572 #endif 573 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 574 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 575 col = in[j]; 576 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 577 B = aij->B; 578 b = (Mat_SeqAIJ *)B->data; 579 bimax = b->imax; 580 bi = b->i; 581 bilen = b->ilen; 582 bj = b->j; 583 ba = b->a; 584 rp2 = bj + bi[row]; 585 ap2 = ba + bi[row]; 586 rmax2 = bimax[row]; 587 nrow2 = bilen[row]; 588 low2 = 0; 589 high2 = nrow2; 590 bm = aij->B->rmap->n; 591 ba = b->a; 592 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 593 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 594 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 595 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 596 } 597 } else col = in[j]; 598 nonew = b->nonew; 599 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 600 } 601 } 602 } else { 603 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 604 if (!aij->donotstash) { 605 mat->assembled = PETSC_FALSE; 606 if (roworiented) { 607 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v ? v + i * n : NULL, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 608 } else { 609 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v ? v + i : NULL, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 610 } 611 } 612 } 613 } 614 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 615 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 /* 620 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 621 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 622 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 623 */ 624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 627 Mat A = aij->A; /* diagonal part of the matrix */ 628 Mat B = aij->B; /* off-diagonal part of the matrix */ 629 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 630 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 632 PetscInt *ailen = a->ilen, *aj = a->j; 633 PetscInt *bilen = b->ilen, *bj = b->j; 634 PetscInt am = aij->A->rmap->n, j; 635 PetscInt diag_so_far = 0, dnz; 636 PetscInt offd_so_far = 0, onz; 637 638 PetscFunctionBegin; 639 /* Iterate over all rows of the matrix */ 640 for (j = 0; j < am; j++) { 641 dnz = onz = 0; 642 /* Iterate over all non-zero columns of the current row */ 643 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 644 /* If column is in the diagonal */ 645 if (mat_j[col] >= cstart && mat_j[col] < cend) { 646 aj[diag_so_far++] = mat_j[col] - cstart; 647 dnz++; 648 } else { /* off-diagonal entries */ 649 bj[offd_so_far++] = mat_j[col]; 650 onz++; 651 } 652 } 653 ailen[j] = dnz; 654 bilen[j] = onz; 655 } 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 /* 660 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 661 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 662 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 663 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 664 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 665 */ 666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 669 Mat A = aij->A; /* diagonal part of the matrix */ 670 Mat B = aij->B; /* off-diagonal part of the matrix */ 671 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 673 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 674 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 675 PetscInt *ailen = a->ilen, *aj = a->j; 676 PetscInt *bilen = b->ilen, *bj = b->j; 677 PetscInt am = aij->A->rmap->n, j; 678 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 679 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 680 PetscScalar *aa = a->a, *ba = b->a; 681 682 PetscFunctionBegin; 683 /* Iterate over all rows of the matrix */ 684 for (j = 0; j < am; j++) { 685 dnz_row = onz_row = 0; 686 rowstart_offd = full_offd_i[j]; 687 rowstart_diag = full_diag_i[j]; 688 /* Iterate over all non-zero columns of the current row */ 689 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 690 /* If column is in the diagonal */ 691 if (mat_j[col] >= cstart && mat_j[col] < cend) { 692 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 693 aa[rowstart_diag + dnz_row] = mat_a[col]; 694 dnz_row++; 695 } else { /* off-diagonal entries */ 696 bj[rowstart_offd + onz_row] = mat_j[col]; 697 ba[rowstart_offd + onz_row] = mat_a[col]; 698 onz_row++; 699 } 700 } 701 ailen[j] = dnz_row; 702 bilen[j] = onz_row; 703 } 704 PetscFunctionReturn(PETSC_SUCCESS); 705 } 706 707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 708 { 709 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 710 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 711 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 712 713 PetscFunctionBegin; 714 for (i = 0; i < m; i++) { 715 if (idxm[i] < 0) continue; /* negative row */ 716 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 717 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 718 row = idxm[i] - rstart; 719 for (j = 0; j < n; j++) { 720 if (idxn[j] < 0) continue; /* negative column */ 721 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 722 if (idxn[j] >= cstart && idxn[j] < cend) { 723 col = idxn[j] - cstart; 724 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 725 } else { 726 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 727 #if defined(PETSC_USE_CTABLE) 728 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 729 col--; 730 #else 731 col = aij->colmap[idxn[j]] - 1; 732 #endif 733 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 734 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 735 } 736 } 737 } 738 PetscFunctionReturn(PETSC_SUCCESS); 739 } 740 741 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 744 PetscInt nstash, reallocs; 745 746 PetscFunctionBegin; 747 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 748 749 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 750 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 751 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 758 PetscMPIInt n; 759 PetscInt i, j, rstart, ncols, flg; 760 PetscInt *row, *col; 761 PetscBool other_disassembled; 762 PetscScalar *val; 763 764 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 765 766 PetscFunctionBegin; 767 if (!aij->donotstash && !mat->nooffprocentries) { 768 while (1) { 769 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 770 if (!flg) break; 771 772 for (i = 0; i < n;) { 773 /* Now identify the consecutive vals belonging to the same row */ 774 for (j = i, rstart = row[j]; j < n; j++) { 775 if (row[j] != rstart) break; 776 } 777 if (j < n) ncols = j - i; 778 else ncols = n - i; 779 /* Now assemble all these values with a single function call */ 780 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 781 i = j; 782 } 783 } 784 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 785 } 786 #if defined(PETSC_HAVE_DEVICE) 787 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 788 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 789 if (mat->boundtocpu) { 790 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 791 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 792 } 793 #endif 794 PetscCall(MatAssemblyBegin(aij->A, mode)); 795 PetscCall(MatAssemblyEnd(aij->A, mode)); 796 797 /* determine if any processor has disassembled, if so we must 798 also disassemble ourself, in order that we may reassemble. */ 799 /* 800 if nonzero structure of submatrix B cannot change then we know that 801 no processor disassembled thus we can skip this stuff 802 */ 803 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 804 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 805 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 806 PetscCall(MatDisAssemble_MPIAIJ(mat)); 807 } 808 } 809 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 810 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 811 #if defined(PETSC_HAVE_DEVICE) 812 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 813 #endif 814 PetscCall(MatAssemblyBegin(aij->B, mode)); 815 PetscCall(MatAssemblyEnd(aij->B, mode)); 816 817 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 818 819 aij->rowvalues = NULL; 820 821 PetscCall(VecDestroy(&aij->diag)); 822 823 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 824 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 825 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 826 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 827 } 828 #if defined(PETSC_HAVE_DEVICE) 829 mat->offloadmask = PETSC_OFFLOAD_BOTH; 830 #endif 831 PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 835 { 836 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 837 838 PetscFunctionBegin; 839 PetscCall(MatZeroEntries(l->A)); 840 PetscCall(MatZeroEntries(l->B)); 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 845 { 846 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 847 PetscObjectState sA, sB; 848 PetscInt *lrows; 849 PetscInt r, len; 850 PetscBool cong, lch, gch; 851 852 PetscFunctionBegin; 853 /* get locally owned rows */ 854 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 855 PetscCall(MatHasCongruentLayouts(A, &cong)); 856 /* fix right hand side if needed */ 857 if (x && b) { 858 const PetscScalar *xx; 859 PetscScalar *bb; 860 861 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 862 PetscCall(VecGetArrayRead(x, &xx)); 863 PetscCall(VecGetArray(b, &bb)); 864 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 865 PetscCall(VecRestoreArrayRead(x, &xx)); 866 PetscCall(VecRestoreArray(b, &bb)); 867 } 868 869 sA = mat->A->nonzerostate; 870 sB = mat->B->nonzerostate; 871 872 if (diag != 0.0 && cong) { 873 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 874 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 875 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 876 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 877 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 878 PetscInt nnwA, nnwB; 879 PetscBool nnzA, nnzB; 880 881 nnwA = aijA->nonew; 882 nnwB = aijB->nonew; 883 nnzA = aijA->keepnonzeropattern; 884 nnzB = aijB->keepnonzeropattern; 885 if (!nnzA) { 886 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 887 aijA->nonew = 0; 888 } 889 if (!nnzB) { 890 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 891 aijB->nonew = 0; 892 } 893 /* Must zero here before the next loop */ 894 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 895 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 896 for (r = 0; r < len; ++r) { 897 const PetscInt row = lrows[r] + A->rmap->rstart; 898 if (row >= A->cmap->N) continue; 899 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 900 } 901 aijA->nonew = nnwA; 902 aijB->nonew = nnwB; 903 } else { 904 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 905 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 906 } 907 PetscCall(PetscFree(lrows)); 908 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 909 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 910 911 /* reduce nonzerostate */ 912 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 913 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 914 if (gch) A->nonzerostate++; 915 PetscFunctionReturn(PETSC_SUCCESS); 916 } 917 918 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 919 { 920 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 921 PetscMPIInt n = A->rmap->n; 922 PetscInt i, j, r, m, len = 0; 923 PetscInt *lrows, *owners = A->rmap->range; 924 PetscMPIInt p = 0; 925 PetscSFNode *rrows; 926 PetscSF sf; 927 const PetscScalar *xx; 928 PetscScalar *bb, *mask, *aij_a; 929 Vec xmask, lmask; 930 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 931 const PetscInt *aj, *ii, *ridx; 932 PetscScalar *aa; 933 934 PetscFunctionBegin; 935 /* Create SF where leaves are input rows and roots are owned rows */ 936 PetscCall(PetscMalloc1(n, &lrows)); 937 for (r = 0; r < n; ++r) lrows[r] = -1; 938 PetscCall(PetscMalloc1(N, &rrows)); 939 for (r = 0; r < N; ++r) { 940 const PetscInt idx = rows[r]; 941 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 942 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 943 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 944 } 945 rrows[r].rank = p; 946 rrows[r].index = rows[r] - owners[p]; 947 } 948 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 949 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 950 /* Collect flags for rows to be zeroed */ 951 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 952 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 953 PetscCall(PetscSFDestroy(&sf)); 954 /* Compress and put in row numbers */ 955 for (r = 0; r < n; ++r) 956 if (lrows[r] >= 0) lrows[len++] = r; 957 /* zero diagonal part of matrix */ 958 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 959 /* handle off-diagonal part of matrix */ 960 PetscCall(MatCreateVecs(A, &xmask, NULL)); 961 PetscCall(VecDuplicate(l->lvec, &lmask)); 962 PetscCall(VecGetArray(xmask, &bb)); 963 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 964 PetscCall(VecRestoreArray(xmask, &bb)); 965 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 966 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 967 PetscCall(VecDestroy(&xmask)); 968 if (x && b) { /* this code is buggy when the row and column layout don't match */ 969 PetscBool cong; 970 971 PetscCall(MatHasCongruentLayouts(A, &cong)); 972 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 973 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 974 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 975 PetscCall(VecGetArrayRead(l->lvec, &xx)); 976 PetscCall(VecGetArray(b, &bb)); 977 } 978 PetscCall(VecGetArray(lmask, &mask)); 979 /* remove zeroed rows of off-diagonal matrix */ 980 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 981 ii = aij->i; 982 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 983 /* loop over all elements of off process part of matrix zeroing removed columns*/ 984 if (aij->compressedrow.use) { 985 m = aij->compressedrow.nrows; 986 ii = aij->compressedrow.i; 987 ridx = aij->compressedrow.rindex; 988 for (i = 0; i < m; i++) { 989 n = ii[i + 1] - ii[i]; 990 aj = aij->j + ii[i]; 991 aa = aij_a + ii[i]; 992 993 for (j = 0; j < n; j++) { 994 if (PetscAbsScalar(mask[*aj])) { 995 if (b) bb[*ridx] -= *aa * xx[*aj]; 996 *aa = 0.0; 997 } 998 aa++; 999 aj++; 1000 } 1001 ridx++; 1002 } 1003 } else { /* do not use compressed row format */ 1004 m = l->B->rmap->n; 1005 for (i = 0; i < m; i++) { 1006 n = ii[i + 1] - ii[i]; 1007 aj = aij->j + ii[i]; 1008 aa = aij_a + ii[i]; 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[i] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 } 1018 } 1019 if (x && b) { 1020 PetscCall(VecRestoreArray(b, &bb)); 1021 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1022 } 1023 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1024 PetscCall(VecRestoreArray(lmask, &mask)); 1025 PetscCall(VecDestroy(&lmask)); 1026 PetscCall(PetscFree(lrows)); 1027 1028 /* only change matrix nonzero state if pattern was allowed to be changed */ 1029 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1030 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1031 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1032 } 1033 PetscFunctionReturn(PETSC_SUCCESS); 1034 } 1035 1036 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1037 { 1038 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1039 PetscInt nt; 1040 VecScatter Mvctx = a->Mvctx; 1041 1042 PetscFunctionBegin; 1043 PetscCall(VecGetLocalSize(xx, &nt)); 1044 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1045 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1046 PetscUseTypeMethod(a->A, mult, xx, yy); 1047 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1048 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 1056 PetscFunctionBegin; 1057 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1058 PetscFunctionReturn(PETSC_SUCCESS); 1059 } 1060 1061 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1062 { 1063 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1064 VecScatter Mvctx = a->Mvctx; 1065 1066 PetscFunctionBegin; 1067 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1068 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1069 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1070 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1071 PetscFunctionReturn(PETSC_SUCCESS); 1072 } 1073 1074 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1075 { 1076 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1077 1078 PetscFunctionBegin; 1079 /* do nondiagonal part */ 1080 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1081 /* do local part */ 1082 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1083 /* add partial results together */ 1084 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1085 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1090 { 1091 MPI_Comm comm; 1092 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1093 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1094 IS Me, Notme; 1095 PetscInt M, N, first, last, *notme, i; 1096 PetscBool lf; 1097 PetscMPIInt size; 1098 1099 PetscFunctionBegin; 1100 /* Easy test: symmetric diagonal block */ 1101 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1102 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1103 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1104 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1105 PetscCallMPI(MPI_Comm_size(comm, &size)); 1106 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1107 1108 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1109 PetscCall(MatGetSize(Amat, &M, &N)); 1110 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1111 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1112 for (i = 0; i < first; i++) notme[i] = i; 1113 for (i = last; i < M; i++) notme[i - last + first] = i; 1114 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1115 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1116 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1117 Aoff = Aoffs[0]; 1118 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1119 Boff = Boffs[0]; 1120 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1121 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1122 PetscCall(MatDestroyMatrices(1, &Boffs)); 1123 PetscCall(ISDestroy(&Me)); 1124 PetscCall(ISDestroy(&Notme)); 1125 PetscCall(PetscFree(notme)); 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128 1129 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1130 { 1131 PetscFunctionBegin; 1132 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1133 PetscFunctionReturn(PETSC_SUCCESS); 1134 } 1135 1136 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1137 { 1138 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1139 1140 PetscFunctionBegin; 1141 /* do nondiagonal part */ 1142 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1143 /* do local part */ 1144 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1145 /* add partial results together */ 1146 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1147 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1148 PetscFunctionReturn(PETSC_SUCCESS); 1149 } 1150 1151 /* 1152 This only works correctly for square matrices where the subblock A->A is the 1153 diagonal block 1154 */ 1155 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1161 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1162 PetscCall(MatGetDiagonal(a->A, v)); 1163 PetscFunctionReturn(PETSC_SUCCESS); 1164 } 1165 1166 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1167 { 1168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCall(MatScale(a->A, aa)); 1172 PetscCall(MatScale(a->B, aa)); 1173 PetscFunctionReturn(PETSC_SUCCESS); 1174 } 1175 1176 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1177 { 1178 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1179 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1180 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1181 const PetscInt *garray = aij->garray; 1182 const PetscScalar *aa, *ba; 1183 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1184 PetscInt64 nz, hnz; 1185 PetscInt *rowlens; 1186 PetscInt *colidxs; 1187 PetscScalar *matvals; 1188 PetscMPIInt rank; 1189 1190 PetscFunctionBegin; 1191 PetscCall(PetscViewerSetUp(viewer)); 1192 1193 M = mat->rmap->N; 1194 N = mat->cmap->N; 1195 m = mat->rmap->n; 1196 rs = mat->rmap->rstart; 1197 cs = mat->cmap->rstart; 1198 nz = A->nz + B->nz; 1199 1200 /* write matrix header */ 1201 header[0] = MAT_FILE_CLASSID; 1202 header[1] = M; 1203 header[2] = N; 1204 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1205 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1206 if (rank == 0) { 1207 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1208 else header[3] = (PetscInt)hnz; 1209 } 1210 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1211 1212 /* fill in and store row lengths */ 1213 PetscCall(PetscMalloc1(m, &rowlens)); 1214 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1215 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1216 PetscCall(PetscFree(rowlens)); 1217 1218 /* fill in and store column indices */ 1219 PetscCall(PetscMalloc1(nz, &colidxs)); 1220 for (cnt = 0, i = 0; i < m; i++) { 1221 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1222 if (garray[B->j[jb]] > cs) break; 1223 colidxs[cnt++] = garray[B->j[jb]]; 1224 } 1225 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1226 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1227 } 1228 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1229 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1230 PetscCall(PetscFree(colidxs)); 1231 1232 /* fill in and store nonzero values */ 1233 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1235 PetscCall(PetscMalloc1(nz, &matvals)); 1236 for (cnt = 0, i = 0; i < m; i++) { 1237 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1242 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1243 } 1244 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1245 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1246 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1247 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1248 PetscCall(PetscFree(matvals)); 1249 1250 /* write block size option to the viewer's .info file */ 1251 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1252 PetscFunctionReturn(PETSC_SUCCESS); 1253 } 1254 1255 #include <petscdraw.h> 1256 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1257 { 1258 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1259 PetscMPIInt rank = aij->rank, size = aij->size; 1260 PetscBool isdraw, iascii, isbinary; 1261 PetscViewer sviewer; 1262 PetscViewerFormat format; 1263 1264 PetscFunctionBegin; 1265 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1266 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1267 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1268 if (iascii) { 1269 PetscCall(PetscViewerGetFormat(viewer, &format)); 1270 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1271 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1272 PetscCall(PetscMalloc1(size, &nz)); 1273 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1274 for (i = 0; i < (PetscInt)size; i++) { 1275 nmax = PetscMax(nmax, nz[i]); 1276 nmin = PetscMin(nmin, nz[i]); 1277 navg += nz[i]; 1278 } 1279 PetscCall(PetscFree(nz)); 1280 navg = navg / size; 1281 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 PetscCall(PetscViewerGetFormat(viewer, &format)); 1285 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1286 MatInfo info; 1287 PetscInt *inodes = NULL; 1288 1289 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1290 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1291 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1292 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1293 if (!inodes) { 1294 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1295 (double)info.memory)); 1296 } else { 1297 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1298 (double)info.memory)); 1299 } 1300 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1301 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1302 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1303 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1304 PetscCall(PetscViewerFlush(viewer)); 1305 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1306 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1307 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1310 PetscInt inodecount, inodelimit, *inodes; 1311 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1312 if (inodes) { 1313 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1314 } else { 1315 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } 1321 } else if (isbinary) { 1322 if (size == 1) { 1323 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1324 PetscCall(MatView(aij->A, viewer)); 1325 } else { 1326 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1327 } 1328 PetscFunctionReturn(PETSC_SUCCESS); 1329 } else if (iascii && size == 1) { 1330 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1331 PetscCall(MatView(aij->A, viewer)); 1332 PetscFunctionReturn(PETSC_SUCCESS); 1333 } else if (isdraw) { 1334 PetscDraw draw; 1335 PetscBool isnull; 1336 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1337 PetscCall(PetscDrawIsNull(draw, &isnull)); 1338 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1339 } 1340 1341 { /* assemble the entire matrix onto first processor */ 1342 Mat A = NULL, Av; 1343 IS isrow, iscol; 1344 1345 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1346 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1347 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1348 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1349 /* The commented code uses MatCreateSubMatrices instead */ 1350 /* 1351 Mat *AA, A = NULL, Av; 1352 IS isrow,iscol; 1353 1354 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1355 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1356 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1357 if (rank == 0) { 1358 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1359 A = AA[0]; 1360 Av = AA[0]; 1361 } 1362 PetscCall(MatDestroySubMatrices(1,&AA)); 1363 */ 1364 PetscCall(ISDestroy(&iscol)); 1365 PetscCall(ISDestroy(&isrow)); 1366 /* 1367 Everyone has to call to draw the matrix since the graphics waits are 1368 synchronized across all processors that share the PetscDraw object 1369 */ 1370 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1371 if (rank == 0) { 1372 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1373 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1374 } 1375 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1376 PetscCall(PetscViewerFlush(viewer)); 1377 PetscCall(MatDestroy(&A)); 1378 } 1379 PetscFunctionReturn(PETSC_SUCCESS); 1380 } 1381 1382 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1383 { 1384 PetscBool iascii, isdraw, issocket, isbinary; 1385 1386 PetscFunctionBegin; 1387 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1388 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1389 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1390 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1391 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1396 { 1397 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1398 Vec bb1 = NULL; 1399 PetscBool hasop; 1400 1401 PetscFunctionBegin; 1402 if (flag == SOR_APPLY_UPPER) { 1403 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1404 PetscFunctionReturn(PETSC_SUCCESS); 1405 } 1406 1407 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1408 1409 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1410 if (flag & SOR_ZERO_INITIAL_GUESS) { 1411 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1412 its--; 1413 } 1414 1415 while (its--) { 1416 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1417 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1418 1419 /* update rhs: bb1 = bb - B*x */ 1420 PetscCall(VecScale(mat->lvec, -1.0)); 1421 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1422 1423 /* local sweep */ 1424 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1425 } 1426 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1427 if (flag & SOR_ZERO_INITIAL_GUESS) { 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1429 its--; 1430 } 1431 while (its--) { 1432 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1433 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1434 1435 /* update rhs: bb1 = bb - B*x */ 1436 PetscCall(VecScale(mat->lvec, -1.0)); 1437 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1438 1439 /* local sweep */ 1440 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1441 } 1442 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1443 if (flag & SOR_ZERO_INITIAL_GUESS) { 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1445 its--; 1446 } 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec, -1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1457 } 1458 } else if (flag & SOR_EISENSTAT) { 1459 Vec xx1; 1460 1461 PetscCall(VecDuplicate(bb, &xx1)); 1462 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1463 1464 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1466 if (!mat->diag) { 1467 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1468 PetscCall(MatGetDiagonal(matin, mat->diag)); 1469 } 1470 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1471 if (hasop) { 1472 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1473 } else { 1474 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1475 } 1476 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1477 1478 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1479 1480 /* local sweep */ 1481 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1482 PetscCall(VecAXPY(xx, 1.0, xx1)); 1483 PetscCall(VecDestroy(&xx1)); 1484 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1485 1486 PetscCall(VecDestroy(&bb1)); 1487 1488 matin->factorerrortype = mat->A->factorerrortype; 1489 PetscFunctionReturn(PETSC_SUCCESS); 1490 } 1491 1492 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1493 { 1494 Mat aA, aB, Aperm; 1495 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1496 PetscScalar *aa, *ba; 1497 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1498 PetscSF rowsf, sf; 1499 IS parcolp = NULL; 1500 PetscBool done; 1501 1502 PetscFunctionBegin; 1503 PetscCall(MatGetLocalSize(A, &m, &n)); 1504 PetscCall(ISGetIndices(rowp, &rwant)); 1505 PetscCall(ISGetIndices(colp, &cwant)); 1506 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1507 1508 /* Invert row permutation to find out where my rows should go */ 1509 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1510 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1511 PetscCall(PetscSFSetFromOptions(rowsf)); 1512 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1513 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1514 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1515 1516 /* Invert column permutation to find out where my columns should go */ 1517 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1518 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1519 PetscCall(PetscSFSetFromOptions(sf)); 1520 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1521 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1522 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1523 PetscCall(PetscSFDestroy(&sf)); 1524 1525 PetscCall(ISRestoreIndices(rowp, &rwant)); 1526 PetscCall(ISRestoreIndices(colp, &cwant)); 1527 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1528 1529 /* Find out where my gcols should go */ 1530 PetscCall(MatGetSize(aB, NULL, &ng)); 1531 PetscCall(PetscMalloc1(ng, &gcdest)); 1532 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1533 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1534 PetscCall(PetscSFSetFromOptions(sf)); 1535 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1536 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1537 PetscCall(PetscSFDestroy(&sf)); 1538 1539 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1540 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1541 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1542 for (i = 0; i < m; i++) { 1543 PetscInt row = rdest[i]; 1544 PetscMPIInt rowner; 1545 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1546 for (j = ai[i]; j < ai[i + 1]; j++) { 1547 PetscInt col = cdest[aj[j]]; 1548 PetscMPIInt cowner; 1549 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1550 if (rowner == cowner) dnnz[i]++; 1551 else onnz[i]++; 1552 } 1553 for (j = bi[i]; j < bi[i + 1]; j++) { 1554 PetscInt col = gcdest[bj[j]]; 1555 PetscMPIInt cowner; 1556 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1557 if (rowner == cowner) dnnz[i]++; 1558 else onnz[i]++; 1559 } 1560 } 1561 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1562 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1563 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1564 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1565 PetscCall(PetscSFDestroy(&rowsf)); 1566 1567 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1568 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1569 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1570 for (i = 0; i < m; i++) { 1571 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1572 PetscInt j0, rowlen; 1573 rowlen = ai[i + 1] - ai[i]; 1574 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1575 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1576 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1577 } 1578 rowlen = bi[i + 1] - bi[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { 1580 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1582 } 1583 } 1584 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1585 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1586 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1587 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1588 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1589 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1590 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1591 PetscCall(PetscFree3(work, rdest, cdest)); 1592 PetscCall(PetscFree(gcdest)); 1593 if (parcolp) PetscCall(ISDestroy(&colp)); 1594 *B = Aperm; 1595 PetscFunctionReturn(PETSC_SUCCESS); 1596 } 1597 1598 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1599 { 1600 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1601 1602 PetscFunctionBegin; 1603 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1604 if (ghosts) *ghosts = aij->garray; 1605 PetscFunctionReturn(PETSC_SUCCESS); 1606 } 1607 1608 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1609 { 1610 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1611 Mat A = mat->A, B = mat->B; 1612 PetscLogDouble isend[5], irecv[5]; 1613 1614 PetscFunctionBegin; 1615 info->block_size = 1.0; 1616 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1617 1618 isend[0] = info->nz_used; 1619 isend[1] = info->nz_allocated; 1620 isend[2] = info->nz_unneeded; 1621 isend[3] = info->memory; 1622 isend[4] = info->mallocs; 1623 1624 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1625 1626 isend[0] += info->nz_used; 1627 isend[1] += info->nz_allocated; 1628 isend[2] += info->nz_unneeded; 1629 isend[3] += info->memory; 1630 isend[4] += info->mallocs; 1631 if (flag == MAT_LOCAL) { 1632 info->nz_used = isend[0]; 1633 info->nz_allocated = isend[1]; 1634 info->nz_unneeded = isend[2]; 1635 info->memory = isend[3]; 1636 info->mallocs = isend[4]; 1637 } else if (flag == MAT_GLOBAL_MAX) { 1638 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1639 1640 info->nz_used = irecv[0]; 1641 info->nz_allocated = irecv[1]; 1642 info->nz_unneeded = irecv[2]; 1643 info->memory = irecv[3]; 1644 info->mallocs = irecv[4]; 1645 } else if (flag == MAT_GLOBAL_SUM) { 1646 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1647 1648 info->nz_used = irecv[0]; 1649 info->nz_allocated = irecv[1]; 1650 info->nz_unneeded = irecv[2]; 1651 info->memory = irecv[3]; 1652 info->mallocs = irecv[4]; 1653 } 1654 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1655 info->fill_ratio_needed = 0; 1656 info->factor_mallocs = 0; 1657 PetscFunctionReturn(PETSC_SUCCESS); 1658 } 1659 1660 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1661 { 1662 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1663 1664 PetscFunctionBegin; 1665 switch (op) { 1666 case MAT_NEW_NONZERO_LOCATIONS: 1667 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1668 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1669 case MAT_KEEP_NONZERO_PATTERN: 1670 case MAT_NEW_NONZERO_LOCATION_ERR: 1671 case MAT_USE_INODES: 1672 case MAT_IGNORE_ZERO_ENTRIES: 1673 case MAT_FORM_EXPLICIT_TRANSPOSE: 1674 MatCheckPreallocated(A, 1); 1675 PetscCall(MatSetOption(a->A, op, flg)); 1676 PetscCall(MatSetOption(a->B, op, flg)); 1677 break; 1678 case MAT_ROW_ORIENTED: 1679 MatCheckPreallocated(A, 1); 1680 a->roworiented = flg; 1681 1682 PetscCall(MatSetOption(a->A, op, flg)); 1683 PetscCall(MatSetOption(a->B, op, flg)); 1684 break; 1685 case MAT_FORCE_DIAGONAL_ENTRIES: 1686 case MAT_SORTED_FULL: 1687 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1688 break; 1689 case MAT_IGNORE_OFF_PROC_ENTRIES: 1690 a->donotstash = flg; 1691 break; 1692 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1693 case MAT_SPD: 1694 case MAT_SYMMETRIC: 1695 case MAT_STRUCTURALLY_SYMMETRIC: 1696 case MAT_HERMITIAN: 1697 case MAT_SYMMETRY_ETERNAL: 1698 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1699 case MAT_SPD_ETERNAL: 1700 /* if the diagonal matrix is square it inherits some of the properties above */ 1701 break; 1702 case MAT_SUBMAT_SINGLEIS: 1703 A->submat_singleis = flg; 1704 break; 1705 case MAT_STRUCTURE_ONLY: 1706 /* The option is handled directly by MatSetOption() */ 1707 break; 1708 default: 1709 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1710 } 1711 PetscFunctionReturn(PETSC_SUCCESS); 1712 } 1713 1714 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1715 { 1716 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1717 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1718 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1719 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1720 PetscInt *cmap, *idx_p; 1721 1722 PetscFunctionBegin; 1723 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1724 mat->getrowactive = PETSC_TRUE; 1725 1726 if (!mat->rowvalues && (idx || v)) { 1727 /* 1728 allocate enough space to hold information from the longest row. 1729 */ 1730 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1731 PetscInt max = 1, tmp; 1732 for (i = 0; i < matin->rmap->n; i++) { 1733 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1734 if (max < tmp) max = tmp; 1735 } 1736 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1737 } 1738 1739 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1740 lrow = row - rstart; 1741 1742 pvA = &vworkA; 1743 pcA = &cworkA; 1744 pvB = &vworkB; 1745 pcB = &cworkB; 1746 if (!v) { 1747 pvA = NULL; 1748 pvB = NULL; 1749 } 1750 if (!idx) { 1751 pcA = NULL; 1752 if (!v) pcB = NULL; 1753 } 1754 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1755 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1756 nztot = nzA + nzB; 1757 1758 cmap = mat->garray; 1759 if (v || idx) { 1760 if (nztot) { 1761 /* Sort by increasing column numbers, assuming A and B already sorted */ 1762 PetscInt imark = -1; 1763 if (v) { 1764 *v = v_p = mat->rowvalues; 1765 for (i = 0; i < nzB; i++) { 1766 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1767 else break; 1768 } 1769 imark = i; 1770 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1771 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1772 } 1773 if (idx) { 1774 *idx = idx_p = mat->rowindices; 1775 if (imark > -1) { 1776 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1777 } else { 1778 for (i = 0; i < nzB; i++) { 1779 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1780 else break; 1781 } 1782 imark = i; 1783 } 1784 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1785 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1786 } 1787 } else { 1788 if (idx) *idx = NULL; 1789 if (v) *v = NULL; 1790 } 1791 } 1792 *nz = nztot; 1793 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1794 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1795 PetscFunctionReturn(PETSC_SUCCESS); 1796 } 1797 1798 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1799 { 1800 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1801 1802 PetscFunctionBegin; 1803 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1804 aij->getrowactive = PETSC_FALSE; 1805 PetscFunctionReturn(PETSC_SUCCESS); 1806 } 1807 1808 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1809 { 1810 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1811 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1812 PetscInt i, j, cstart = mat->cmap->rstart; 1813 PetscReal sum = 0.0; 1814 const MatScalar *v, *amata, *bmata; 1815 1816 PetscFunctionBegin; 1817 if (aij->size == 1) { 1818 PetscCall(MatNorm(aij->A, type, norm)); 1819 } else { 1820 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1821 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1822 if (type == NORM_FROBENIUS) { 1823 v = amata; 1824 for (i = 0; i < amat->nz; i++) { 1825 sum += PetscRealPart(PetscConj(*v) * (*v)); 1826 v++; 1827 } 1828 v = bmata; 1829 for (i = 0; i < bmat->nz; i++) { 1830 sum += PetscRealPart(PetscConj(*v) * (*v)); 1831 v++; 1832 } 1833 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1834 *norm = PetscSqrtReal(*norm); 1835 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1836 } else if (type == NORM_1) { /* max column norm */ 1837 PetscReal *tmp, *tmp2; 1838 PetscInt *jj, *garray = aij->garray; 1839 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1840 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1841 *norm = 0.0; 1842 v = amata; 1843 jj = amat->j; 1844 for (j = 0; j < amat->nz; j++) { 1845 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1846 v++; 1847 } 1848 v = bmata; 1849 jj = bmat->j; 1850 for (j = 0; j < bmat->nz; j++) { 1851 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1852 v++; 1853 } 1854 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1855 for (j = 0; j < mat->cmap->N; j++) { 1856 if (tmp2[j] > *norm) *norm = tmp2[j]; 1857 } 1858 PetscCall(PetscFree(tmp)); 1859 PetscCall(PetscFree(tmp2)); 1860 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1861 } else if (type == NORM_INFINITY) { /* max row norm */ 1862 PetscReal ntemp = 0.0; 1863 for (j = 0; j < aij->A->rmap->n; j++) { 1864 v = amata + amat->i[j]; 1865 sum = 0.0; 1866 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1867 sum += PetscAbsScalar(*v); 1868 v++; 1869 } 1870 v = bmata + bmat->i[j]; 1871 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1872 sum += PetscAbsScalar(*v); 1873 v++; 1874 } 1875 if (sum > ntemp) ntemp = sum; 1876 } 1877 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1878 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1879 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1880 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1881 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1882 } 1883 PetscFunctionReturn(PETSC_SUCCESS); 1884 } 1885 1886 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1887 { 1888 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1889 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1890 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1891 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1892 Mat B, A_diag, *B_diag; 1893 const MatScalar *pbv, *bv; 1894 1895 PetscFunctionBegin; 1896 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1897 ma = A->rmap->n; 1898 na = A->cmap->n; 1899 mb = a->B->rmap->n; 1900 nb = a->B->cmap->n; 1901 ai = Aloc->i; 1902 aj = Aloc->j; 1903 bi = Bloc->i; 1904 bj = Bloc->j; 1905 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1906 PetscInt *d_nnz, *g_nnz, *o_nnz; 1907 PetscSFNode *oloc; 1908 PETSC_UNUSED PetscSF sf; 1909 1910 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1911 /* compute d_nnz for preallocation */ 1912 PetscCall(PetscArrayzero(d_nnz, na)); 1913 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1914 /* compute local off-diagonal contributions */ 1915 PetscCall(PetscArrayzero(g_nnz, nb)); 1916 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1917 /* map those to global */ 1918 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1919 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1920 PetscCall(PetscSFSetFromOptions(sf)); 1921 PetscCall(PetscArrayzero(o_nnz, na)); 1922 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1923 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1924 PetscCall(PetscSFDestroy(&sf)); 1925 1926 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1927 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1928 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1929 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1930 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1931 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1932 } else { 1933 B = *matout; 1934 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1935 } 1936 1937 b = (Mat_MPIAIJ *)B->data; 1938 A_diag = a->A; 1939 B_diag = &b->A; 1940 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1941 A_diag_ncol = A_diag->cmap->N; 1942 B_diag_ilen = sub_B_diag->ilen; 1943 B_diag_i = sub_B_diag->i; 1944 1945 /* Set ilen for diagonal of B */ 1946 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1947 1948 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1949 very quickly (=without using MatSetValues), because all writes are local. */ 1950 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1951 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1952 1953 /* copy over the B part */ 1954 PetscCall(PetscMalloc1(bi[mb], &cols)); 1955 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1956 pbv = bv; 1957 row = A->rmap->rstart; 1958 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1959 cols_tmp = cols; 1960 for (i = 0; i < mb; i++) { 1961 ncol = bi[i + 1] - bi[i]; 1962 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1963 row++; 1964 if (pbv) pbv += ncol; 1965 if (cols_tmp) cols_tmp += ncol; 1966 } 1967 PetscCall(PetscFree(cols)); 1968 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1969 1970 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1971 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1972 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1973 *matout = B; 1974 } else { 1975 PetscCall(MatHeaderMerge(A, &B)); 1976 } 1977 PetscFunctionReturn(PETSC_SUCCESS); 1978 } 1979 1980 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1981 { 1982 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1983 Mat a = aij->A, b = aij->B; 1984 PetscInt s1, s2, s3; 1985 1986 PetscFunctionBegin; 1987 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1988 if (rr) { 1989 PetscCall(VecGetLocalSize(rr, &s1)); 1990 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1991 /* Overlap communication with computation. */ 1992 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1993 } 1994 if (ll) { 1995 PetscCall(VecGetLocalSize(ll, &s1)); 1996 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1997 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1998 } 1999 /* scale the diagonal block */ 2000 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2001 2002 if (rr) { 2003 /* Do a scatter end and then right scale the off-diagonal block */ 2004 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2005 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2006 } 2007 PetscFunctionReturn(PETSC_SUCCESS); 2008 } 2009 2010 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2011 { 2012 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2013 2014 PetscFunctionBegin; 2015 PetscCall(MatSetUnfactored(a->A)); 2016 PetscFunctionReturn(PETSC_SUCCESS); 2017 } 2018 2019 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2020 { 2021 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2022 Mat a, b, c, d; 2023 PetscBool flg; 2024 2025 PetscFunctionBegin; 2026 a = matA->A; 2027 b = matA->B; 2028 c = matB->A; 2029 d = matB->B; 2030 2031 PetscCall(MatEqual(a, c, &flg)); 2032 if (flg) PetscCall(MatEqual(b, d, &flg)); 2033 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2034 PetscFunctionReturn(PETSC_SUCCESS); 2035 } 2036 2037 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2038 { 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2040 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2041 2042 PetscFunctionBegin; 2043 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2044 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2045 /* because of the column compression in the off-processor part of the matrix a->B, 2046 the number of columns in a->B and b->B may be different, hence we cannot call 2047 the MatCopy() directly on the two parts. If need be, we can provide a more 2048 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2049 then copying the submatrices */ 2050 PetscCall(MatCopy_Basic(A, B, str)); 2051 } else { 2052 PetscCall(MatCopy(a->A, b->A, str)); 2053 PetscCall(MatCopy(a->B, b->B, str)); 2054 } 2055 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2056 PetscFunctionReturn(PETSC_SUCCESS); 2057 } 2058 2059 /* 2060 Computes the number of nonzeros per row needed for preallocation when X and Y 2061 have different nonzero structure. 2062 */ 2063 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2064 { 2065 PetscInt i, j, k, nzx, nzy; 2066 2067 PetscFunctionBegin; 2068 /* Set the number of nonzeros in the new matrix */ 2069 for (i = 0; i < m; i++) { 2070 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2071 nzx = xi[i + 1] - xi[i]; 2072 nzy = yi[i + 1] - yi[i]; 2073 nnz[i] = 0; 2074 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2075 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2076 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2077 nnz[i]++; 2078 } 2079 for (; k < nzy; k++) nnz[i]++; 2080 } 2081 PetscFunctionReturn(PETSC_SUCCESS); 2082 } 2083 2084 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2085 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2086 { 2087 PetscInt m = Y->rmap->N; 2088 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2089 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2090 2091 PetscFunctionBegin; 2092 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2093 PetscFunctionReturn(PETSC_SUCCESS); 2094 } 2095 2096 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2097 { 2098 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2099 2100 PetscFunctionBegin; 2101 if (str == SAME_NONZERO_PATTERN) { 2102 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2103 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2104 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2105 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2106 } else { 2107 Mat B; 2108 PetscInt *nnz_d, *nnz_o; 2109 2110 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2111 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2112 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2113 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2114 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2115 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2116 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2117 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2118 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2119 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2120 PetscCall(MatHeaderMerge(Y, &B)); 2121 PetscCall(PetscFree(nnz_d)); 2122 PetscCall(PetscFree(nnz_o)); 2123 } 2124 PetscFunctionReturn(PETSC_SUCCESS); 2125 } 2126 2127 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2128 2129 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2130 { 2131 PetscFunctionBegin; 2132 if (PetscDefined(USE_COMPLEX)) { 2133 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2134 2135 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2136 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2137 } 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2144 2145 PetscFunctionBegin; 2146 PetscCall(MatRealPart(a->A)); 2147 PetscCall(MatRealPart(a->B)); 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2152 { 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2154 2155 PetscFunctionBegin; 2156 PetscCall(MatImaginaryPart(a->A)); 2157 PetscCall(MatImaginaryPart(a->B)); 2158 PetscFunctionReturn(PETSC_SUCCESS); 2159 } 2160 2161 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2162 { 2163 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2164 PetscInt i, *idxb = NULL, m = A->rmap->n; 2165 PetscScalar *va, *vv; 2166 Vec vB, vA; 2167 const PetscScalar *vb; 2168 2169 PetscFunctionBegin; 2170 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2171 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2172 2173 PetscCall(VecGetArrayWrite(vA, &va)); 2174 if (idx) { 2175 for (i = 0; i < m; i++) { 2176 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2177 } 2178 } 2179 2180 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2181 PetscCall(PetscMalloc1(m, &idxb)); 2182 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2183 2184 PetscCall(VecGetArrayWrite(v, &vv)); 2185 PetscCall(VecGetArrayRead(vB, &vb)); 2186 for (i = 0; i < m; i++) { 2187 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2188 vv[i] = vb[i]; 2189 if (idx) idx[i] = a->garray[idxb[i]]; 2190 } else { 2191 vv[i] = va[i]; 2192 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2193 } 2194 } 2195 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2196 PetscCall(VecRestoreArrayWrite(vA, &va)); 2197 PetscCall(VecRestoreArrayRead(vB, &vb)); 2198 PetscCall(PetscFree(idxb)); 2199 PetscCall(VecDestroy(&vA)); 2200 PetscCall(VecDestroy(&vB)); 2201 PetscFunctionReturn(PETSC_SUCCESS); 2202 } 2203 2204 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2205 { 2206 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2207 PetscInt m = A->rmap->n, n = A->cmap->n; 2208 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2209 PetscInt *cmap = mat->garray; 2210 PetscInt *diagIdx, *offdiagIdx; 2211 Vec diagV, offdiagV; 2212 PetscScalar *a, *diagA, *offdiagA; 2213 const PetscScalar *ba, *bav; 2214 PetscInt r, j, col, ncols, *bi, *bj; 2215 Mat B = mat->B; 2216 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2217 2218 PetscFunctionBegin; 2219 /* When a process holds entire A and other processes have no entry */ 2220 if (A->cmap->N == n) { 2221 PetscCall(VecGetArrayWrite(v, &diagA)); 2222 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2223 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2224 PetscCall(VecDestroy(&diagV)); 2225 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2226 PetscFunctionReturn(PETSC_SUCCESS); 2227 } else if (n == 0) { 2228 if (m) { 2229 PetscCall(VecGetArrayWrite(v, &a)); 2230 for (r = 0; r < m; r++) { 2231 a[r] = 0.0; 2232 if (idx) idx[r] = -1; 2233 } 2234 PetscCall(VecRestoreArrayWrite(v, &a)); 2235 } 2236 PetscFunctionReturn(PETSC_SUCCESS); 2237 } 2238 2239 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2240 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2241 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2242 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2243 2244 /* Get offdiagIdx[] for implicit 0.0 */ 2245 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2246 ba = bav; 2247 bi = b->i; 2248 bj = b->j; 2249 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2250 for (r = 0; r < m; r++) { 2251 ncols = bi[r + 1] - bi[r]; 2252 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2253 offdiagA[r] = *ba; 2254 offdiagIdx[r] = cmap[0]; 2255 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2256 offdiagA[r] = 0.0; 2257 2258 /* Find first hole in the cmap */ 2259 for (j = 0; j < ncols; j++) { 2260 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2261 if (col > j && j < cstart) { 2262 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2263 break; 2264 } else if (col > j + n && j >= cstart) { 2265 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2266 break; 2267 } 2268 } 2269 if (j == ncols && ncols < A->cmap->N - n) { 2270 /* a hole is outside compressed Bcols */ 2271 if (ncols == 0) { 2272 if (cstart) { 2273 offdiagIdx[r] = 0; 2274 } else offdiagIdx[r] = cend; 2275 } else { /* ncols > 0 */ 2276 offdiagIdx[r] = cmap[ncols - 1] + 1; 2277 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2278 } 2279 } 2280 } 2281 2282 for (j = 0; j < ncols; j++) { 2283 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2284 offdiagA[r] = *ba; 2285 offdiagIdx[r] = cmap[*bj]; 2286 } 2287 ba++; 2288 bj++; 2289 } 2290 } 2291 2292 PetscCall(VecGetArrayWrite(v, &a)); 2293 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2294 for (r = 0; r < m; ++r) { 2295 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2296 a[r] = diagA[r]; 2297 if (idx) idx[r] = cstart + diagIdx[r]; 2298 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2299 a[r] = diagA[r]; 2300 if (idx) { 2301 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2302 idx[r] = cstart + diagIdx[r]; 2303 } else idx[r] = offdiagIdx[r]; 2304 } 2305 } else { 2306 a[r] = offdiagA[r]; 2307 if (idx) idx[r] = offdiagIdx[r]; 2308 } 2309 } 2310 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2311 PetscCall(VecRestoreArrayWrite(v, &a)); 2312 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2313 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2314 PetscCall(VecDestroy(&diagV)); 2315 PetscCall(VecDestroy(&offdiagV)); 2316 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2317 PetscFunctionReturn(PETSC_SUCCESS); 2318 } 2319 2320 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2321 { 2322 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2323 PetscInt m = A->rmap->n, n = A->cmap->n; 2324 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2325 PetscInt *cmap = mat->garray; 2326 PetscInt *diagIdx, *offdiagIdx; 2327 Vec diagV, offdiagV; 2328 PetscScalar *a, *diagA, *offdiagA; 2329 const PetscScalar *ba, *bav; 2330 PetscInt r, j, col, ncols, *bi, *bj; 2331 Mat B = mat->B; 2332 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2333 2334 PetscFunctionBegin; 2335 /* When a process holds entire A and other processes have no entry */ 2336 if (A->cmap->N == n) { 2337 PetscCall(VecGetArrayWrite(v, &diagA)); 2338 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2339 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2340 PetscCall(VecDestroy(&diagV)); 2341 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2342 PetscFunctionReturn(PETSC_SUCCESS); 2343 } else if (n == 0) { 2344 if (m) { 2345 PetscCall(VecGetArrayWrite(v, &a)); 2346 for (r = 0; r < m; r++) { 2347 a[r] = PETSC_MAX_REAL; 2348 if (idx) idx[r] = -1; 2349 } 2350 PetscCall(VecRestoreArrayWrite(v, &a)); 2351 } 2352 PetscFunctionReturn(PETSC_SUCCESS); 2353 } 2354 2355 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2357 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2358 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2359 2360 /* Get offdiagIdx[] for implicit 0.0 */ 2361 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2362 ba = bav; 2363 bi = b->i; 2364 bj = b->j; 2365 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2366 for (r = 0; r < m; r++) { 2367 ncols = bi[r + 1] - bi[r]; 2368 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2369 offdiagA[r] = *ba; 2370 offdiagIdx[r] = cmap[0]; 2371 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2372 offdiagA[r] = 0.0; 2373 2374 /* Find first hole in the cmap */ 2375 for (j = 0; j < ncols; j++) { 2376 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2377 if (col > j && j < cstart) { 2378 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2379 break; 2380 } else if (col > j + n && j >= cstart) { 2381 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2382 break; 2383 } 2384 } 2385 if (j == ncols && ncols < A->cmap->N - n) { 2386 /* a hole is outside compressed Bcols */ 2387 if (ncols == 0) { 2388 if (cstart) { 2389 offdiagIdx[r] = 0; 2390 } else offdiagIdx[r] = cend; 2391 } else { /* ncols > 0 */ 2392 offdiagIdx[r] = cmap[ncols - 1] + 1; 2393 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2394 } 2395 } 2396 } 2397 2398 for (j = 0; j < ncols; j++) { 2399 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2400 offdiagA[r] = *ba; 2401 offdiagIdx[r] = cmap[*bj]; 2402 } 2403 ba++; 2404 bj++; 2405 } 2406 } 2407 2408 PetscCall(VecGetArrayWrite(v, &a)); 2409 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2410 for (r = 0; r < m; ++r) { 2411 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2412 a[r] = diagA[r]; 2413 if (idx) idx[r] = cstart + diagIdx[r]; 2414 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2415 a[r] = diagA[r]; 2416 if (idx) { 2417 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2418 idx[r] = cstart + diagIdx[r]; 2419 } else idx[r] = offdiagIdx[r]; 2420 } 2421 } else { 2422 a[r] = offdiagA[r]; 2423 if (idx) idx[r] = offdiagIdx[r]; 2424 } 2425 } 2426 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2427 PetscCall(VecRestoreArrayWrite(v, &a)); 2428 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2429 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2430 PetscCall(VecDestroy(&diagV)); 2431 PetscCall(VecDestroy(&offdiagV)); 2432 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2433 PetscFunctionReturn(PETSC_SUCCESS); 2434 } 2435 2436 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2437 { 2438 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2439 PetscInt m = A->rmap->n, n = A->cmap->n; 2440 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2441 PetscInt *cmap = mat->garray; 2442 PetscInt *diagIdx, *offdiagIdx; 2443 Vec diagV, offdiagV; 2444 PetscScalar *a, *diagA, *offdiagA; 2445 const PetscScalar *ba, *bav; 2446 PetscInt r, j, col, ncols, *bi, *bj; 2447 Mat B = mat->B; 2448 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2449 2450 PetscFunctionBegin; 2451 /* When a process holds entire A and other processes have no entry */ 2452 if (A->cmap->N == n) { 2453 PetscCall(VecGetArrayWrite(v, &diagA)); 2454 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2455 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2456 PetscCall(VecDestroy(&diagV)); 2457 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2458 PetscFunctionReturn(PETSC_SUCCESS); 2459 } else if (n == 0) { 2460 if (m) { 2461 PetscCall(VecGetArrayWrite(v, &a)); 2462 for (r = 0; r < m; r++) { 2463 a[r] = PETSC_MIN_REAL; 2464 if (idx) idx[r] = -1; 2465 } 2466 PetscCall(VecRestoreArrayWrite(v, &a)); 2467 } 2468 PetscFunctionReturn(PETSC_SUCCESS); 2469 } 2470 2471 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2472 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2473 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2474 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2475 2476 /* Get offdiagIdx[] for implicit 0.0 */ 2477 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2478 ba = bav; 2479 bi = b->i; 2480 bj = b->j; 2481 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2482 for (r = 0; r < m; r++) { 2483 ncols = bi[r + 1] - bi[r]; 2484 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2485 offdiagA[r] = *ba; 2486 offdiagIdx[r] = cmap[0]; 2487 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2488 offdiagA[r] = 0.0; 2489 2490 /* Find first hole in the cmap */ 2491 for (j = 0; j < ncols; j++) { 2492 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2493 if (col > j && j < cstart) { 2494 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2495 break; 2496 } else if (col > j + n && j >= cstart) { 2497 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2498 break; 2499 } 2500 } 2501 if (j == ncols && ncols < A->cmap->N - n) { 2502 /* a hole is outside compressed Bcols */ 2503 if (ncols == 0) { 2504 if (cstart) { 2505 offdiagIdx[r] = 0; 2506 } else offdiagIdx[r] = cend; 2507 } else { /* ncols > 0 */ 2508 offdiagIdx[r] = cmap[ncols - 1] + 1; 2509 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2510 } 2511 } 2512 } 2513 2514 for (j = 0; j < ncols; j++) { 2515 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2516 offdiagA[r] = *ba; 2517 offdiagIdx[r] = cmap[*bj]; 2518 } 2519 ba++; 2520 bj++; 2521 } 2522 } 2523 2524 PetscCall(VecGetArrayWrite(v, &a)); 2525 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2526 for (r = 0; r < m; ++r) { 2527 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2528 a[r] = diagA[r]; 2529 if (idx) idx[r] = cstart + diagIdx[r]; 2530 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2531 a[r] = diagA[r]; 2532 if (idx) { 2533 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2534 idx[r] = cstart + diagIdx[r]; 2535 } else idx[r] = offdiagIdx[r]; 2536 } 2537 } else { 2538 a[r] = offdiagA[r]; 2539 if (idx) idx[r] = offdiagIdx[r]; 2540 } 2541 } 2542 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2543 PetscCall(VecRestoreArrayWrite(v, &a)); 2544 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2545 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2546 PetscCall(VecDestroy(&diagV)); 2547 PetscCall(VecDestroy(&offdiagV)); 2548 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2549 PetscFunctionReturn(PETSC_SUCCESS); 2550 } 2551 2552 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2553 { 2554 Mat *dummy; 2555 2556 PetscFunctionBegin; 2557 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2558 *newmat = *dummy; 2559 PetscCall(PetscFree(dummy)); 2560 PetscFunctionReturn(PETSC_SUCCESS); 2561 } 2562 2563 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2564 { 2565 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2566 2567 PetscFunctionBegin; 2568 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2569 A->factorerrortype = a->A->factorerrortype; 2570 PetscFunctionReturn(PETSC_SUCCESS); 2571 } 2572 2573 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2574 { 2575 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2576 2577 PetscFunctionBegin; 2578 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2579 PetscCall(MatSetRandom(aij->A, rctx)); 2580 if (x->assembled) { 2581 PetscCall(MatSetRandom(aij->B, rctx)); 2582 } else { 2583 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2584 } 2585 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2586 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2587 PetscFunctionReturn(PETSC_SUCCESS); 2588 } 2589 2590 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2591 { 2592 PetscFunctionBegin; 2593 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2594 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2595 PetscFunctionReturn(PETSC_SUCCESS); 2596 } 2597 2598 /*@ 2599 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2600 2601 Not Collective 2602 2603 Input Parameter: 2604 . A - the matrix 2605 2606 Output Parameter: 2607 . nz - the number of nonzeros 2608 2609 Level: advanced 2610 2611 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2612 @*/ 2613 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2614 { 2615 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2616 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2617 PetscBool isaij; 2618 2619 PetscFunctionBegin; 2620 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2621 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2622 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2623 PetscFunctionReturn(PETSC_SUCCESS); 2624 } 2625 2626 /*@ 2627 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2628 2629 Collective 2630 2631 Input Parameters: 2632 + A - the matrix 2633 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2634 2635 Level: advanced 2636 2637 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2638 @*/ 2639 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2640 { 2641 PetscFunctionBegin; 2642 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2643 PetscFunctionReturn(PETSC_SUCCESS); 2644 } 2645 2646 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2647 { 2648 PetscBool sc = PETSC_FALSE, flg; 2649 2650 PetscFunctionBegin; 2651 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2652 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2653 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2654 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2655 PetscOptionsHeadEnd(); 2656 PetscFunctionReturn(PETSC_SUCCESS); 2657 } 2658 2659 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2660 { 2661 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2662 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2663 2664 PetscFunctionBegin; 2665 if (!Y->preallocated) { 2666 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2667 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2668 PetscInt nonew = aij->nonew; 2669 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2670 aij->nonew = nonew; 2671 } 2672 PetscCall(MatShift_Basic(Y, a)); 2673 PetscFunctionReturn(PETSC_SUCCESS); 2674 } 2675 2676 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2677 { 2678 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2679 2680 PetscFunctionBegin; 2681 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2682 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2683 if (d) { 2684 PetscInt rstart; 2685 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2686 *d += rstart; 2687 } 2688 PetscFunctionReturn(PETSC_SUCCESS); 2689 } 2690 2691 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2692 { 2693 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2694 2695 PetscFunctionBegin; 2696 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2697 PetscFunctionReturn(PETSC_SUCCESS); 2698 } 2699 2700 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2701 { 2702 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2703 2704 PetscFunctionBegin; 2705 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2706 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2707 PetscFunctionReturn(PETSC_SUCCESS); 2708 } 2709 2710 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2711 MatGetRow_MPIAIJ, 2712 MatRestoreRow_MPIAIJ, 2713 MatMult_MPIAIJ, 2714 /* 4*/ MatMultAdd_MPIAIJ, 2715 MatMultTranspose_MPIAIJ, 2716 MatMultTransposeAdd_MPIAIJ, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*10*/ NULL, 2721 NULL, 2722 NULL, 2723 MatSOR_MPIAIJ, 2724 MatTranspose_MPIAIJ, 2725 /*15*/ MatGetInfo_MPIAIJ, 2726 MatEqual_MPIAIJ, 2727 MatGetDiagonal_MPIAIJ, 2728 MatDiagonalScale_MPIAIJ, 2729 MatNorm_MPIAIJ, 2730 /*20*/ MatAssemblyBegin_MPIAIJ, 2731 MatAssemblyEnd_MPIAIJ, 2732 MatSetOption_MPIAIJ, 2733 MatZeroEntries_MPIAIJ, 2734 /*24*/ MatZeroRows_MPIAIJ, 2735 NULL, 2736 NULL, 2737 NULL, 2738 NULL, 2739 /*29*/ MatSetUp_MPI_Hash, 2740 NULL, 2741 NULL, 2742 MatGetDiagonalBlock_MPIAIJ, 2743 NULL, 2744 /*34*/ MatDuplicate_MPIAIJ, 2745 NULL, 2746 NULL, 2747 NULL, 2748 NULL, 2749 /*39*/ MatAXPY_MPIAIJ, 2750 MatCreateSubMatrices_MPIAIJ, 2751 MatIncreaseOverlap_MPIAIJ, 2752 MatGetValues_MPIAIJ, 2753 MatCopy_MPIAIJ, 2754 /*44*/ MatGetRowMax_MPIAIJ, 2755 MatScale_MPIAIJ, 2756 MatShift_MPIAIJ, 2757 MatDiagonalSet_MPIAIJ, 2758 MatZeroRowsColumns_MPIAIJ, 2759 /*49*/ MatSetRandom_MPIAIJ, 2760 MatGetRowIJ_MPIAIJ, 2761 MatRestoreRowIJ_MPIAIJ, 2762 NULL, 2763 NULL, 2764 /*54*/ MatFDColoringCreate_MPIXAIJ, 2765 NULL, 2766 MatSetUnfactored_MPIAIJ, 2767 MatPermute_MPIAIJ, 2768 NULL, 2769 /*59*/ MatCreateSubMatrix_MPIAIJ, 2770 MatDestroy_MPIAIJ, 2771 MatView_MPIAIJ, 2772 NULL, 2773 NULL, 2774 /*64*/ NULL, 2775 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 NULL, 2779 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2780 MatGetRowMinAbs_MPIAIJ, 2781 NULL, 2782 NULL, 2783 NULL, 2784 NULL, 2785 /*75*/ MatFDColoringApply_AIJ, 2786 MatSetFromOptions_MPIAIJ, 2787 NULL, 2788 NULL, 2789 MatFindZeroDiagonals_MPIAIJ, 2790 /*80*/ NULL, 2791 NULL, 2792 NULL, 2793 /*83*/ MatLoad_MPIAIJ, 2794 MatIsSymmetric_MPIAIJ, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*89*/ NULL, 2800 NULL, 2801 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2802 NULL, 2803 NULL, 2804 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2805 NULL, 2806 NULL, 2807 NULL, 2808 MatBindToCPU_MPIAIJ, 2809 /*99*/ MatProductSetFromOptions_MPIAIJ, 2810 NULL, 2811 NULL, 2812 MatConjugate_MPIAIJ, 2813 NULL, 2814 /*104*/ MatSetValuesRow_MPIAIJ, 2815 MatRealPart_MPIAIJ, 2816 MatImaginaryPart_MPIAIJ, 2817 NULL, 2818 NULL, 2819 /*109*/ NULL, 2820 NULL, 2821 MatGetRowMin_MPIAIJ, 2822 NULL, 2823 MatMissingDiagonal_MPIAIJ, 2824 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2825 NULL, 2826 MatGetGhosts_MPIAIJ, 2827 NULL, 2828 NULL, 2829 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2830 NULL, 2831 NULL, 2832 NULL, 2833 MatGetMultiProcBlock_MPIAIJ, 2834 /*124*/ MatFindNonzeroRows_MPIAIJ, 2835 MatGetColumnReductions_MPIAIJ, 2836 MatInvertBlockDiagonal_MPIAIJ, 2837 MatInvertVariableBlockDiagonal_MPIAIJ, 2838 MatCreateSubMatricesMPI_MPIAIJ, 2839 /*129*/ NULL, 2840 NULL, 2841 NULL, 2842 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2843 NULL, 2844 /*134*/ NULL, 2845 NULL, 2846 NULL, 2847 NULL, 2848 NULL, 2849 /*139*/ MatSetBlockSizes_MPIAIJ, 2850 NULL, 2851 NULL, 2852 MatFDColoringSetUp_MPIXAIJ, 2853 MatFindOffBlockDiagonalEntries_MPIAIJ, 2854 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2855 /*145*/ NULL, 2856 NULL, 2857 NULL, 2858 MatCreateGraph_Simple_AIJ, 2859 NULL, 2860 /*150*/ NULL, 2861 MatEliminateZeros_MPIAIJ}; 2862 2863 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2864 { 2865 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2866 2867 PetscFunctionBegin; 2868 PetscCall(MatStoreValues(aij->A)); 2869 PetscCall(MatStoreValues(aij->B)); 2870 PetscFunctionReturn(PETSC_SUCCESS); 2871 } 2872 2873 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2874 { 2875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2876 2877 PetscFunctionBegin; 2878 PetscCall(MatRetrieveValues(aij->A)); 2879 PetscCall(MatRetrieveValues(aij->B)); 2880 PetscFunctionReturn(PETSC_SUCCESS); 2881 } 2882 2883 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2884 { 2885 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2886 PetscMPIInt size; 2887 2888 PetscFunctionBegin; 2889 if (B->hash_active) { 2890 B->ops[0] = b->cops; 2891 B->hash_active = PETSC_FALSE; 2892 } 2893 PetscCall(PetscLayoutSetUp(B->rmap)); 2894 PetscCall(PetscLayoutSetUp(B->cmap)); 2895 2896 #if defined(PETSC_USE_CTABLE) 2897 PetscCall(PetscHMapIDestroy(&b->colmap)); 2898 #else 2899 PetscCall(PetscFree(b->colmap)); 2900 #endif 2901 PetscCall(PetscFree(b->garray)); 2902 PetscCall(VecDestroy(&b->lvec)); 2903 PetscCall(VecScatterDestroy(&b->Mvctx)); 2904 2905 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2906 PetscCall(MatDestroy(&b->B)); 2907 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2908 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2909 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2910 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2911 2912 PetscCall(MatDestroy(&b->A)); 2913 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2914 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2915 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2916 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2917 2918 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2919 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2920 B->preallocated = PETSC_TRUE; 2921 B->was_assembled = PETSC_FALSE; 2922 B->assembled = PETSC_FALSE; 2923 PetscFunctionReturn(PETSC_SUCCESS); 2924 } 2925 2926 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2927 { 2928 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2929 2930 PetscFunctionBegin; 2931 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2932 PetscCall(PetscLayoutSetUp(B->rmap)); 2933 PetscCall(PetscLayoutSetUp(B->cmap)); 2934 2935 #if defined(PETSC_USE_CTABLE) 2936 PetscCall(PetscHMapIDestroy(&b->colmap)); 2937 #else 2938 PetscCall(PetscFree(b->colmap)); 2939 #endif 2940 PetscCall(PetscFree(b->garray)); 2941 PetscCall(VecDestroy(&b->lvec)); 2942 PetscCall(VecScatterDestroy(&b->Mvctx)); 2943 2944 PetscCall(MatResetPreallocation(b->A)); 2945 PetscCall(MatResetPreallocation(b->B)); 2946 B->preallocated = PETSC_TRUE; 2947 B->was_assembled = PETSC_FALSE; 2948 B->assembled = PETSC_FALSE; 2949 PetscFunctionReturn(PETSC_SUCCESS); 2950 } 2951 2952 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2953 { 2954 Mat mat; 2955 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2956 2957 PetscFunctionBegin; 2958 *newmat = NULL; 2959 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2960 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2961 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2962 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2963 a = (Mat_MPIAIJ *)mat->data; 2964 2965 mat->factortype = matin->factortype; 2966 mat->assembled = matin->assembled; 2967 mat->insertmode = NOT_SET_VALUES; 2968 2969 a->size = oldmat->size; 2970 a->rank = oldmat->rank; 2971 a->donotstash = oldmat->donotstash; 2972 a->roworiented = oldmat->roworiented; 2973 a->rowindices = NULL; 2974 a->rowvalues = NULL; 2975 a->getrowactive = PETSC_FALSE; 2976 2977 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2978 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2979 if (matin->hash_active) { 2980 PetscCall(MatSetUp(mat)); 2981 } else { 2982 mat->preallocated = matin->preallocated; 2983 if (oldmat->colmap) { 2984 #if defined(PETSC_USE_CTABLE) 2985 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2986 #else 2987 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2988 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2989 #endif 2990 } else a->colmap = NULL; 2991 if (oldmat->garray) { 2992 PetscInt len; 2993 len = oldmat->B->cmap->n; 2994 PetscCall(PetscMalloc1(len + 1, &a->garray)); 2995 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2996 } else a->garray = NULL; 2997 2998 /* It may happen MatDuplicate is called with a non-assembled matrix 2999 In fact, MatDuplicate only requires the matrix to be preallocated 3000 This may happen inside a DMCreateMatrix_Shell */ 3001 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3002 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3003 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3004 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3005 } 3006 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3007 *newmat = mat; 3008 PetscFunctionReturn(PETSC_SUCCESS); 3009 } 3010 3011 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3012 { 3013 PetscBool isbinary, ishdf5; 3014 3015 PetscFunctionBegin; 3016 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3017 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3018 /* force binary viewer to load .info file if it has not yet done so */ 3019 PetscCall(PetscViewerSetUp(viewer)); 3020 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3021 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3022 if (isbinary) { 3023 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3024 } else if (ishdf5) { 3025 #if defined(PETSC_HAVE_HDF5) 3026 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3027 #else 3028 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3029 #endif 3030 } else { 3031 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3032 } 3033 PetscFunctionReturn(PETSC_SUCCESS); 3034 } 3035 3036 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3037 { 3038 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3039 PetscInt *rowidxs, *colidxs; 3040 PetscScalar *matvals; 3041 3042 PetscFunctionBegin; 3043 PetscCall(PetscViewerSetUp(viewer)); 3044 3045 /* read in matrix header */ 3046 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3047 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3048 M = header[1]; 3049 N = header[2]; 3050 nz = header[3]; 3051 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3052 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3053 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3054 3055 /* set block sizes from the viewer's .info file */ 3056 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3057 /* set global sizes if not set already */ 3058 if (mat->rmap->N < 0) mat->rmap->N = M; 3059 if (mat->cmap->N < 0) mat->cmap->N = N; 3060 PetscCall(PetscLayoutSetUp(mat->rmap)); 3061 PetscCall(PetscLayoutSetUp(mat->cmap)); 3062 3063 /* check if the matrix sizes are correct */ 3064 PetscCall(MatGetSize(mat, &rows, &cols)); 3065 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3066 3067 /* read in row lengths and build row indices */ 3068 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3069 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3070 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3071 rowidxs[0] = 0; 3072 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3073 if (nz != PETSC_MAX_INT) { 3074 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3075 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3076 } 3077 3078 /* read in column indices and matrix values */ 3079 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3080 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3081 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3082 /* store matrix indices and values */ 3083 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3084 PetscCall(PetscFree(rowidxs)); 3085 PetscCall(PetscFree2(colidxs, matvals)); 3086 PetscFunctionReturn(PETSC_SUCCESS); 3087 } 3088 3089 /* Not scalable because of ISAllGather() unless getting all columns. */ 3090 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3091 { 3092 IS iscol_local; 3093 PetscBool isstride; 3094 PetscMPIInt lisstride = 0, gisstride; 3095 3096 PetscFunctionBegin; 3097 /* check if we are grabbing all columns*/ 3098 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3099 3100 if (isstride) { 3101 PetscInt start, len, mstart, mlen; 3102 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3103 PetscCall(ISGetLocalSize(iscol, &len)); 3104 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3105 if (mstart == start && mlen - mstart == len) lisstride = 1; 3106 } 3107 3108 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3109 if (gisstride) { 3110 PetscInt N; 3111 PetscCall(MatGetSize(mat, NULL, &N)); 3112 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3113 PetscCall(ISSetIdentity(iscol_local)); 3114 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3115 } else { 3116 PetscInt cbs; 3117 PetscCall(ISGetBlockSize(iscol, &cbs)); 3118 PetscCall(ISAllGather(iscol, &iscol_local)); 3119 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3120 } 3121 3122 *isseq = iscol_local; 3123 PetscFunctionReturn(PETSC_SUCCESS); 3124 } 3125 3126 /* 3127 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3128 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3129 3130 Input Parameters: 3131 + mat - matrix 3132 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3133 i.e., mat->rstart <= isrow[i] < mat->rend 3134 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3135 i.e., mat->cstart <= iscol[i] < mat->cend 3136 3137 Output Parameters: 3138 + isrow_d - sequential row index set for retrieving mat->A 3139 . iscol_d - sequential column index set for retrieving mat->A 3140 . iscol_o - sequential column index set for retrieving mat->B 3141 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3142 */ 3143 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3144 { 3145 Vec x, cmap; 3146 const PetscInt *is_idx; 3147 PetscScalar *xarray, *cmaparray; 3148 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3150 Mat B = a->B; 3151 Vec lvec = a->lvec, lcmap; 3152 PetscInt i, cstart, cend, Bn = B->cmap->N; 3153 MPI_Comm comm; 3154 VecScatter Mvctx = a->Mvctx; 3155 3156 PetscFunctionBegin; 3157 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3158 PetscCall(ISGetLocalSize(iscol, &ncols)); 3159 3160 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3161 PetscCall(MatCreateVecs(mat, &x, NULL)); 3162 PetscCall(VecSet(x, -1.0)); 3163 PetscCall(VecDuplicate(x, &cmap)); 3164 PetscCall(VecSet(cmap, -1.0)); 3165 3166 /* Get start indices */ 3167 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3168 isstart -= ncols; 3169 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3170 3171 PetscCall(ISGetIndices(iscol, &is_idx)); 3172 PetscCall(VecGetArray(x, &xarray)); 3173 PetscCall(VecGetArray(cmap, &cmaparray)); 3174 PetscCall(PetscMalloc1(ncols, &idx)); 3175 for (i = 0; i < ncols; i++) { 3176 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3177 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3178 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3179 } 3180 PetscCall(VecRestoreArray(x, &xarray)); 3181 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3182 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3183 3184 /* Get iscol_d */ 3185 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3186 PetscCall(ISGetBlockSize(iscol, &i)); 3187 PetscCall(ISSetBlockSize(*iscol_d, i)); 3188 3189 /* Get isrow_d */ 3190 PetscCall(ISGetLocalSize(isrow, &m)); 3191 rstart = mat->rmap->rstart; 3192 PetscCall(PetscMalloc1(m, &idx)); 3193 PetscCall(ISGetIndices(isrow, &is_idx)); 3194 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3195 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3196 3197 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3198 PetscCall(ISGetBlockSize(isrow, &i)); 3199 PetscCall(ISSetBlockSize(*isrow_d, i)); 3200 3201 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3202 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3203 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3204 3205 PetscCall(VecDuplicate(lvec, &lcmap)); 3206 3207 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3208 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3209 3210 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3211 /* off-process column indices */ 3212 count = 0; 3213 PetscCall(PetscMalloc1(Bn, &idx)); 3214 PetscCall(PetscMalloc1(Bn, &cmap1)); 3215 3216 PetscCall(VecGetArray(lvec, &xarray)); 3217 PetscCall(VecGetArray(lcmap, &cmaparray)); 3218 for (i = 0; i < Bn; i++) { 3219 if (PetscRealPart(xarray[i]) > -1.0) { 3220 idx[count] = i; /* local column index in off-diagonal part B */ 3221 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3222 count++; 3223 } 3224 } 3225 PetscCall(VecRestoreArray(lvec, &xarray)); 3226 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3227 3228 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3229 /* cannot ensure iscol_o has same blocksize as iscol! */ 3230 3231 PetscCall(PetscFree(idx)); 3232 *garray = cmap1; 3233 3234 PetscCall(VecDestroy(&x)); 3235 PetscCall(VecDestroy(&cmap)); 3236 PetscCall(VecDestroy(&lcmap)); 3237 PetscFunctionReturn(PETSC_SUCCESS); 3238 } 3239 3240 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3241 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3242 { 3243 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3244 Mat M = NULL; 3245 MPI_Comm comm; 3246 IS iscol_d, isrow_d, iscol_o; 3247 Mat Asub = NULL, Bsub = NULL; 3248 PetscInt n; 3249 3250 PetscFunctionBegin; 3251 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3252 3253 if (call == MAT_REUSE_MATRIX) { 3254 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3255 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3256 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3257 3258 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3259 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3260 3261 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3262 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3263 3264 /* Update diagonal and off-diagonal portions of submat */ 3265 asub = (Mat_MPIAIJ *)(*submat)->data; 3266 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3267 PetscCall(ISGetLocalSize(iscol_o, &n)); 3268 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3269 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3270 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3271 3272 } else { /* call == MAT_INITIAL_MATRIX) */ 3273 const PetscInt *garray; 3274 PetscInt BsubN; 3275 3276 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3277 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3278 3279 /* Create local submatrices Asub and Bsub */ 3280 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3281 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3282 3283 /* Create submatrix M */ 3284 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3285 3286 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3287 asub = (Mat_MPIAIJ *)M->data; 3288 3289 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3290 n = asub->B->cmap->N; 3291 if (BsubN > n) { 3292 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3293 const PetscInt *idx; 3294 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3295 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3296 3297 PetscCall(PetscMalloc1(n, &idx_new)); 3298 j = 0; 3299 PetscCall(ISGetIndices(iscol_o, &idx)); 3300 for (i = 0; i < n; i++) { 3301 if (j >= BsubN) break; 3302 while (subgarray[i] > garray[j]) j++; 3303 3304 if (subgarray[i] == garray[j]) { 3305 idx_new[i] = idx[j++]; 3306 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3307 } 3308 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3309 3310 PetscCall(ISDestroy(&iscol_o)); 3311 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3312 3313 } else if (BsubN < n) { 3314 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3315 } 3316 3317 PetscCall(PetscFree(garray)); 3318 *submat = M; 3319 3320 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3321 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3322 PetscCall(ISDestroy(&isrow_d)); 3323 3324 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3325 PetscCall(ISDestroy(&iscol_d)); 3326 3327 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3328 PetscCall(ISDestroy(&iscol_o)); 3329 } 3330 PetscFunctionReturn(PETSC_SUCCESS); 3331 } 3332 3333 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3334 { 3335 IS iscol_local = NULL, isrow_d; 3336 PetscInt csize; 3337 PetscInt n, i, j, start, end; 3338 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3339 MPI_Comm comm; 3340 3341 PetscFunctionBegin; 3342 /* If isrow has same processor distribution as mat, 3343 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3344 if (call == MAT_REUSE_MATRIX) { 3345 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3346 if (isrow_d) { 3347 sameRowDist = PETSC_TRUE; 3348 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3349 } else { 3350 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3351 if (iscol_local) { 3352 sameRowDist = PETSC_TRUE; 3353 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3354 } 3355 } 3356 } else { 3357 /* Check if isrow has same processor distribution as mat */ 3358 sameDist[0] = PETSC_FALSE; 3359 PetscCall(ISGetLocalSize(isrow, &n)); 3360 if (!n) { 3361 sameDist[0] = PETSC_TRUE; 3362 } else { 3363 PetscCall(ISGetMinMax(isrow, &i, &j)); 3364 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3365 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3366 } 3367 3368 /* Check if iscol has same processor distribution as mat */ 3369 sameDist[1] = PETSC_FALSE; 3370 PetscCall(ISGetLocalSize(iscol, &n)); 3371 if (!n) { 3372 sameDist[1] = PETSC_TRUE; 3373 } else { 3374 PetscCall(ISGetMinMax(iscol, &i, &j)); 3375 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3376 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3377 } 3378 3379 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3380 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3381 sameRowDist = tsameDist[0]; 3382 } 3383 3384 if (sameRowDist) { 3385 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3386 /* isrow and iscol have same processor distribution as mat */ 3387 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3388 PetscFunctionReturn(PETSC_SUCCESS); 3389 } else { /* sameRowDist */ 3390 /* isrow has same processor distribution as mat */ 3391 if (call == MAT_INITIAL_MATRIX) { 3392 PetscBool sorted; 3393 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3394 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3395 PetscCall(ISGetSize(iscol, &i)); 3396 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3397 3398 PetscCall(ISSorted(iscol_local, &sorted)); 3399 if (sorted) { 3400 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3401 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3402 PetscFunctionReturn(PETSC_SUCCESS); 3403 } 3404 } else { /* call == MAT_REUSE_MATRIX */ 3405 IS iscol_sub; 3406 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3407 if (iscol_sub) { 3408 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3409 PetscFunctionReturn(PETSC_SUCCESS); 3410 } 3411 } 3412 } 3413 } 3414 3415 /* General case: iscol -> iscol_local which has global size of iscol */ 3416 if (call == MAT_REUSE_MATRIX) { 3417 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3418 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3419 } else { 3420 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3421 } 3422 3423 PetscCall(ISGetLocalSize(iscol, &csize)); 3424 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3425 3426 if (call == MAT_INITIAL_MATRIX) { 3427 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3428 PetscCall(ISDestroy(&iscol_local)); 3429 } 3430 PetscFunctionReturn(PETSC_SUCCESS); 3431 } 3432 3433 /*@C 3434 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3435 and "off-diagonal" part of the matrix in CSR format. 3436 3437 Collective 3438 3439 Input Parameters: 3440 + comm - MPI communicator 3441 . A - "diagonal" portion of matrix 3442 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3443 - garray - global index of `B` columns 3444 3445 Output Parameter: 3446 . mat - the matrix, with input `A` as its local diagonal matrix 3447 3448 Level: advanced 3449 3450 Notes: 3451 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3452 3453 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3454 3455 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3456 @*/ 3457 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3458 { 3459 Mat_MPIAIJ *maij; 3460 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3461 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3462 const PetscScalar *oa; 3463 Mat Bnew; 3464 PetscInt m, n, N; 3465 MatType mpi_mat_type; 3466 3467 PetscFunctionBegin; 3468 PetscCall(MatCreate(comm, mat)); 3469 PetscCall(MatGetSize(A, &m, &n)); 3470 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3471 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3472 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3473 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3474 3475 /* Get global columns of mat */ 3476 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3477 3478 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3479 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3480 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3481 PetscCall(MatSetType(*mat, mpi_mat_type)); 3482 3483 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3484 maij = (Mat_MPIAIJ *)(*mat)->data; 3485 3486 (*mat)->preallocated = PETSC_TRUE; 3487 3488 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3489 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3490 3491 /* Set A as diagonal portion of *mat */ 3492 maij->A = A; 3493 3494 nz = oi[m]; 3495 for (i = 0; i < nz; i++) { 3496 col = oj[i]; 3497 oj[i] = garray[col]; 3498 } 3499 3500 /* Set Bnew as off-diagonal portion of *mat */ 3501 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3502 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3503 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3504 bnew = (Mat_SeqAIJ *)Bnew->data; 3505 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3506 maij->B = Bnew; 3507 3508 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3509 3510 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3511 b->free_a = PETSC_FALSE; 3512 b->free_ij = PETSC_FALSE; 3513 PetscCall(MatDestroy(&B)); 3514 3515 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3516 bnew->free_a = PETSC_TRUE; 3517 bnew->free_ij = PETSC_TRUE; 3518 3519 /* condense columns of maij->B */ 3520 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3521 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3522 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3523 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3524 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3525 PetscFunctionReturn(PETSC_SUCCESS); 3526 } 3527 3528 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3529 3530 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3531 { 3532 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3533 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3534 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3535 Mat M, Msub, B = a->B; 3536 MatScalar *aa; 3537 Mat_SeqAIJ *aij; 3538 PetscInt *garray = a->garray, *colsub, Ncols; 3539 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3540 IS iscol_sub, iscmap; 3541 const PetscInt *is_idx, *cmap; 3542 PetscBool allcolumns = PETSC_FALSE; 3543 MPI_Comm comm; 3544 3545 PetscFunctionBegin; 3546 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3547 if (call == MAT_REUSE_MATRIX) { 3548 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3549 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3550 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3551 3552 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3553 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3554 3555 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3556 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3557 3558 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3559 3560 } else { /* call == MAT_INITIAL_MATRIX) */ 3561 PetscBool flg; 3562 3563 PetscCall(ISGetLocalSize(iscol, &n)); 3564 PetscCall(ISGetSize(iscol, &Ncols)); 3565 3566 /* (1) iscol -> nonscalable iscol_local */ 3567 /* Check for special case: each processor gets entire matrix columns */ 3568 PetscCall(ISIdentity(iscol_local, &flg)); 3569 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3570 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3571 if (allcolumns) { 3572 iscol_sub = iscol_local; 3573 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3574 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3575 3576 } else { 3577 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3578 PetscInt *idx, *cmap1, k; 3579 PetscCall(PetscMalloc1(Ncols, &idx)); 3580 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3581 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3582 count = 0; 3583 k = 0; 3584 for (i = 0; i < Ncols; i++) { 3585 j = is_idx[i]; 3586 if (j >= cstart && j < cend) { 3587 /* diagonal part of mat */ 3588 idx[count] = j; 3589 cmap1[count++] = i; /* column index in submat */ 3590 } else if (Bn) { 3591 /* off-diagonal part of mat */ 3592 if (j == garray[k]) { 3593 idx[count] = j; 3594 cmap1[count++] = i; /* column index in submat */ 3595 } else if (j > garray[k]) { 3596 while (j > garray[k] && k < Bn - 1) k++; 3597 if (j == garray[k]) { 3598 idx[count] = j; 3599 cmap1[count++] = i; /* column index in submat */ 3600 } 3601 } 3602 } 3603 } 3604 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3605 3606 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3607 PetscCall(ISGetBlockSize(iscol, &cbs)); 3608 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3609 3610 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3611 } 3612 3613 /* (3) Create sequential Msub */ 3614 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3615 } 3616 3617 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3618 aij = (Mat_SeqAIJ *)(Msub)->data; 3619 ii = aij->i; 3620 PetscCall(ISGetIndices(iscmap, &cmap)); 3621 3622 /* 3623 m - number of local rows 3624 Ncols - number of columns (same on all processors) 3625 rstart - first row in new global matrix generated 3626 */ 3627 PetscCall(MatGetSize(Msub, &m, NULL)); 3628 3629 if (call == MAT_INITIAL_MATRIX) { 3630 /* (4) Create parallel newmat */ 3631 PetscMPIInt rank, size; 3632 PetscInt csize; 3633 3634 PetscCallMPI(MPI_Comm_size(comm, &size)); 3635 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3636 3637 /* 3638 Determine the number of non-zeros in the diagonal and off-diagonal 3639 portions of the matrix in order to do correct preallocation 3640 */ 3641 3642 /* first get start and end of "diagonal" columns */ 3643 PetscCall(ISGetLocalSize(iscol, &csize)); 3644 if (csize == PETSC_DECIDE) { 3645 PetscCall(ISGetSize(isrow, &mglobal)); 3646 if (mglobal == Ncols) { /* square matrix */ 3647 nlocal = m; 3648 } else { 3649 nlocal = Ncols / size + ((Ncols % size) > rank); 3650 } 3651 } else { 3652 nlocal = csize; 3653 } 3654 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3655 rstart = rend - nlocal; 3656 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3657 3658 /* next, compute all the lengths */ 3659 jj = aij->j; 3660 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3661 olens = dlens + m; 3662 for (i = 0; i < m; i++) { 3663 jend = ii[i + 1] - ii[i]; 3664 olen = 0; 3665 dlen = 0; 3666 for (j = 0; j < jend; j++) { 3667 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3668 else dlen++; 3669 jj++; 3670 } 3671 olens[i] = olen; 3672 dlens[i] = dlen; 3673 } 3674 3675 PetscCall(ISGetBlockSize(isrow, &bs)); 3676 PetscCall(ISGetBlockSize(iscol, &cbs)); 3677 3678 PetscCall(MatCreate(comm, &M)); 3679 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3680 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3681 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3682 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3683 PetscCall(PetscFree(dlens)); 3684 3685 } else { /* call == MAT_REUSE_MATRIX */ 3686 M = *newmat; 3687 PetscCall(MatGetLocalSize(M, &i, NULL)); 3688 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3689 PetscCall(MatZeroEntries(M)); 3690 /* 3691 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3692 rather than the slower MatSetValues(). 3693 */ 3694 M->was_assembled = PETSC_TRUE; 3695 M->assembled = PETSC_FALSE; 3696 } 3697 3698 /* (5) Set values of Msub to *newmat */ 3699 PetscCall(PetscMalloc1(count, &colsub)); 3700 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3701 3702 jj = aij->j; 3703 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3704 for (i = 0; i < m; i++) { 3705 row = rstart + i; 3706 nz = ii[i + 1] - ii[i]; 3707 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3708 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3709 jj += nz; 3710 aa += nz; 3711 } 3712 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3713 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3714 3715 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3716 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3717 3718 PetscCall(PetscFree(colsub)); 3719 3720 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3721 if (call == MAT_INITIAL_MATRIX) { 3722 *newmat = M; 3723 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3724 PetscCall(MatDestroy(&Msub)); 3725 3726 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3727 PetscCall(ISDestroy(&iscol_sub)); 3728 3729 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3730 PetscCall(ISDestroy(&iscmap)); 3731 3732 if (iscol_local) { 3733 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3734 PetscCall(ISDestroy(&iscol_local)); 3735 } 3736 } 3737 PetscFunctionReturn(PETSC_SUCCESS); 3738 } 3739 3740 /* 3741 Not great since it makes two copies of the submatrix, first an SeqAIJ 3742 in local and then by concatenating the local matrices the end result. 3743 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3744 3745 This requires a sequential iscol with all indices. 3746 */ 3747 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3748 { 3749 PetscMPIInt rank, size; 3750 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3751 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3752 Mat M, Mreuse; 3753 MatScalar *aa, *vwork; 3754 MPI_Comm comm; 3755 Mat_SeqAIJ *aij; 3756 PetscBool colflag, allcolumns = PETSC_FALSE; 3757 3758 PetscFunctionBegin; 3759 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3760 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3761 PetscCallMPI(MPI_Comm_size(comm, &size)); 3762 3763 /* Check for special case: each processor gets entire matrix columns */ 3764 PetscCall(ISIdentity(iscol, &colflag)); 3765 PetscCall(ISGetLocalSize(iscol, &n)); 3766 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3767 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3768 3769 if (call == MAT_REUSE_MATRIX) { 3770 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3771 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3772 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3773 } else { 3774 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3775 } 3776 3777 /* 3778 m - number of local rows 3779 n - number of columns (same on all processors) 3780 rstart - first row in new global matrix generated 3781 */ 3782 PetscCall(MatGetSize(Mreuse, &m, &n)); 3783 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3784 if (call == MAT_INITIAL_MATRIX) { 3785 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3786 ii = aij->i; 3787 jj = aij->j; 3788 3789 /* 3790 Determine the number of non-zeros in the diagonal and off-diagonal 3791 portions of the matrix in order to do correct preallocation 3792 */ 3793 3794 /* first get start and end of "diagonal" columns */ 3795 if (csize == PETSC_DECIDE) { 3796 PetscCall(ISGetSize(isrow, &mglobal)); 3797 if (mglobal == n) { /* square matrix */ 3798 nlocal = m; 3799 } else { 3800 nlocal = n / size + ((n % size) > rank); 3801 } 3802 } else { 3803 nlocal = csize; 3804 } 3805 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3806 rstart = rend - nlocal; 3807 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3808 3809 /* next, compute all the lengths */ 3810 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3811 olens = dlens + m; 3812 for (i = 0; i < m; i++) { 3813 jend = ii[i + 1] - ii[i]; 3814 olen = 0; 3815 dlen = 0; 3816 for (j = 0; j < jend; j++) { 3817 if (*jj < rstart || *jj >= rend) olen++; 3818 else dlen++; 3819 jj++; 3820 } 3821 olens[i] = olen; 3822 dlens[i] = dlen; 3823 } 3824 PetscCall(MatCreate(comm, &M)); 3825 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3826 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3827 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3828 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3829 PetscCall(PetscFree(dlens)); 3830 } else { 3831 PetscInt ml, nl; 3832 3833 M = *newmat; 3834 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3835 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3836 PetscCall(MatZeroEntries(M)); 3837 /* 3838 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3839 rather than the slower MatSetValues(). 3840 */ 3841 M->was_assembled = PETSC_TRUE; 3842 M->assembled = PETSC_FALSE; 3843 } 3844 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3845 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3846 ii = aij->i; 3847 jj = aij->j; 3848 3849 /* trigger copy to CPU if needed */ 3850 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3851 for (i = 0; i < m; i++) { 3852 row = rstart + i; 3853 nz = ii[i + 1] - ii[i]; 3854 cwork = jj; 3855 jj += nz; 3856 vwork = aa; 3857 aa += nz; 3858 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3859 } 3860 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3861 3862 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3863 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3864 *newmat = M; 3865 3866 /* save submatrix used in processor for next request */ 3867 if (call == MAT_INITIAL_MATRIX) { 3868 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3869 PetscCall(MatDestroy(&Mreuse)); 3870 } 3871 PetscFunctionReturn(PETSC_SUCCESS); 3872 } 3873 3874 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3875 { 3876 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3877 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3878 const PetscInt *JJ; 3879 PetscBool nooffprocentries; 3880 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3881 3882 PetscFunctionBegin; 3883 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3884 3885 PetscCall(PetscLayoutSetUp(B->rmap)); 3886 PetscCall(PetscLayoutSetUp(B->cmap)); 3887 m = B->rmap->n; 3888 cstart = B->cmap->rstart; 3889 cend = B->cmap->rend; 3890 rstart = B->rmap->rstart; 3891 3892 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3893 3894 if (PetscDefined(USE_DEBUG)) { 3895 for (i = 0; i < m; i++) { 3896 nnz = Ii[i + 1] - Ii[i]; 3897 JJ = J ? J + Ii[i] : NULL; 3898 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3899 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3900 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3901 } 3902 } 3903 3904 for (i = 0; i < m; i++) { 3905 nnz = Ii[i + 1] - Ii[i]; 3906 JJ = J ? J + Ii[i] : NULL; 3907 nnz_max = PetscMax(nnz_max, nnz); 3908 d = 0; 3909 for (j = 0; j < nnz; j++) { 3910 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3911 } 3912 d_nnz[i] = d; 3913 o_nnz[i] = nnz - d; 3914 } 3915 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3916 PetscCall(PetscFree2(d_nnz, o_nnz)); 3917 3918 for (i = 0; i < m; i++) { 3919 ii = i + rstart; 3920 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J ? J + Ii[i] : NULL, v ? v + Ii[i] : NULL, INSERT_VALUES)); 3921 } 3922 nooffprocentries = B->nooffprocentries; 3923 B->nooffprocentries = PETSC_TRUE; 3924 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3925 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3926 B->nooffprocentries = nooffprocentries; 3927 3928 /* count number of entries below block diagonal */ 3929 PetscCall(PetscFree(Aij->ld)); 3930 PetscCall(PetscCalloc1(m, &ld)); 3931 Aij->ld = ld; 3932 for (i = 0; i < m; i++) { 3933 nnz = Ii[i + 1] - Ii[i]; 3934 j = 0; 3935 while (j < nnz && J[j] < cstart) j++; 3936 ld[i] = j; 3937 if (J) J += nnz; 3938 } 3939 3940 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3941 PetscFunctionReturn(PETSC_SUCCESS); 3942 } 3943 3944 /*@ 3945 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3946 (the default parallel PETSc format). 3947 3948 Collective 3949 3950 Input Parameters: 3951 + B - the matrix 3952 . i - the indices into j for the start of each local row (starts with zero) 3953 . j - the column indices for each local row (starts with zero) 3954 - v - optional values in the matrix 3955 3956 Level: developer 3957 3958 Notes: 3959 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3960 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3961 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3962 3963 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3964 3965 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3966 3967 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3968 3969 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3970 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3971 3972 The format which is used for the sparse matrix input, is equivalent to a 3973 row-major ordering.. i.e for the following matrix, the input data expected is 3974 as shown 3975 .vb 3976 1 0 0 3977 2 0 3 P0 3978 ------- 3979 4 5 6 P1 3980 3981 Process0 [P0] rows_owned=[0,1] 3982 i = {0,1,3} [size = nrow+1 = 2+1] 3983 j = {0,0,2} [size = 3] 3984 v = {1,2,3} [size = 3] 3985 3986 Process1 [P1] rows_owned=[2] 3987 i = {0,3} [size = nrow+1 = 1+1] 3988 j = {0,1,2} [size = 3] 3989 v = {4,5,6} [size = 3] 3990 .ve 3991 3992 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 3993 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 3994 @*/ 3995 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3996 { 3997 PetscFunctionBegin; 3998 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3999 PetscFunctionReturn(PETSC_SUCCESS); 4000 } 4001 4002 /*@C 4003 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4004 (the default parallel PETSc format). For good matrix assembly performance 4005 the user should preallocate the matrix storage by setting the parameters 4006 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4007 4008 Collective 4009 4010 Input Parameters: 4011 + B - the matrix 4012 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4013 (same value is used for all local rows) 4014 . d_nnz - array containing the number of nonzeros in the various rows of the 4015 DIAGONAL portion of the local submatrix (possibly different for each row) 4016 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4017 The size of this array is equal to the number of local rows, i.e 'm'. 4018 For matrices that will be factored, you must leave room for (and set) 4019 the diagonal entry even if it is zero. 4020 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4021 submatrix (same value is used for all local rows). 4022 - o_nnz - array containing the number of nonzeros in the various rows of the 4023 OFF-DIAGONAL portion of the local submatrix (possibly different for 4024 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4025 structure. The size of this array is equal to the number 4026 of local rows, i.e 'm'. 4027 4028 Example Usage: 4029 Consider the following 8x8 matrix with 34 non-zero values, that is 4030 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4031 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4032 as follows 4033 4034 .vb 4035 1 2 0 | 0 3 0 | 0 4 4036 Proc0 0 5 6 | 7 0 0 | 8 0 4037 9 0 10 | 11 0 0 | 12 0 4038 ------------------------------------- 4039 13 0 14 | 15 16 17 | 0 0 4040 Proc1 0 18 0 | 19 20 21 | 0 0 4041 0 0 0 | 22 23 0 | 24 0 4042 ------------------------------------- 4043 Proc2 25 26 27 | 0 0 28 | 29 0 4044 30 0 0 | 31 32 33 | 0 34 4045 .ve 4046 4047 This can be represented as a collection of submatrices as 4048 .vb 4049 A B C 4050 D E F 4051 G H I 4052 .ve 4053 4054 Where the submatrices A,B,C are owned by proc0, D,E,F are 4055 owned by proc1, G,H,I are owned by proc2. 4056 4057 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4058 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4059 The 'M','N' parameters are 8,8, and have the same values on all procs. 4060 4061 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4062 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4063 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4064 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4065 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4066 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4067 4068 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4069 allocated for every row of the local diagonal submatrix, and `o_nz` 4070 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4071 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4072 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4073 In this case, the values of `d_nz`, `o_nz` are 4074 .vb 4075 proc0 dnz = 2, o_nz = 2 4076 proc1 dnz = 3, o_nz = 2 4077 proc2 dnz = 1, o_nz = 4 4078 .ve 4079 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4080 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4081 for proc3. i.e we are using 12+15+10=37 storage locations to store 4082 34 values. 4083 4084 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4085 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4086 In the above case the values for `d_nnz`, `o_nnz` are 4087 .vb 4088 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4089 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4090 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4091 .ve 4092 Here the space allocated is sum of all the above values i.e 34, and 4093 hence pre-allocation is perfect. 4094 4095 Level: intermediate 4096 4097 Notes: 4098 If the *_nnz parameter is given then the *_nz parameter is ignored 4099 4100 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4101 storage. The stored row and column indices begin with zero. 4102 See [Sparse Matrices](sec_matsparse) for details. 4103 4104 The parallel matrix is partitioned such that the first m0 rows belong to 4105 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4106 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4107 4108 The DIAGONAL portion of the local submatrix of a processor can be defined 4109 as the submatrix which is obtained by extraction the part corresponding to 4110 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4111 first row that belongs to the processor, r2 is the last row belonging to 4112 the this processor, and c1-c2 is range of indices of the local part of a 4113 vector suitable for applying the matrix to. This is an mxn matrix. In the 4114 common case of a square matrix, the row and column ranges are the same and 4115 the DIAGONAL part is also square. The remaining portion of the local 4116 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4117 4118 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4119 4120 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4121 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4122 You can also run with the option `-info` and look for messages with the string 4123 malloc in them to see if additional memory allocation was needed. 4124 4125 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4126 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4127 @*/ 4128 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4129 { 4130 PetscFunctionBegin; 4131 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4132 PetscValidType(B, 1); 4133 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4134 PetscFunctionReturn(PETSC_SUCCESS); 4135 } 4136 4137 /*@ 4138 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4139 CSR format for the local rows. 4140 4141 Collective 4142 4143 Input Parameters: 4144 + comm - MPI communicator 4145 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4146 . n - This value should be the same as the local size used in creating the 4147 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4148 calculated if N is given) For square matrices n is almost always m. 4149 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4150 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4151 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4152 . j - column indices 4153 - a - optional matrix values 4154 4155 Output Parameter: 4156 . mat - the matrix 4157 4158 Level: intermediate 4159 4160 Notes: 4161 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4162 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4163 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4164 4165 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4166 4167 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArrays()` 4168 4169 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 4170 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4171 4172 The format which is used for the sparse matrix input, is equivalent to a 4173 row-major ordering.. i.e for the following matrix, the input data expected is 4174 as shown 4175 .vb 4176 1 0 0 4177 2 0 3 P0 4178 ------- 4179 4 5 6 P1 4180 4181 Process0 [P0] rows_owned=[0,1] 4182 i = {0,1,3} [size = nrow+1 = 2+1] 4183 j = {0,0,2} [size = 3] 4184 v = {1,2,3} [size = 3] 4185 4186 Process1 [P1] rows_owned=[2] 4187 i = {0,3} [size = nrow+1 = 1+1] 4188 j = {0,1,2} [size = 3] 4189 v = {4,5,6} [size = 3] 4190 .ve 4191 4192 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4193 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4194 @*/ 4195 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4196 { 4197 PetscFunctionBegin; 4198 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4199 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4200 PetscCall(MatCreate(comm, mat)); 4201 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4202 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4203 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4204 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4205 PetscFunctionReturn(PETSC_SUCCESS); 4206 } 4207 4208 /*@ 4209 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4210 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4211 from `MatCreateMPIAIJWithArrays()` 4212 4213 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4214 4215 Collective 4216 4217 Input Parameters: 4218 + mat - the matrix 4219 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4220 . n - This value should be the same as the local size used in creating the 4221 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4222 calculated if N is given) For square matrices n is almost always m. 4223 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4224 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4225 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4226 . J - column indices 4227 - v - matrix values 4228 4229 Level: deprecated 4230 4231 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4232 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4233 @*/ 4234 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4235 { 4236 PetscInt nnz, i; 4237 PetscBool nooffprocentries; 4238 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4239 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4240 PetscScalar *ad, *ao; 4241 PetscInt ldi, Iii, md; 4242 const PetscInt *Adi = Ad->i; 4243 PetscInt *ld = Aij->ld; 4244 4245 PetscFunctionBegin; 4246 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4247 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4248 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4249 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4250 4251 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4252 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4253 4254 for (i = 0; i < m; i++) { 4255 if (PetscDefined(USE_DEBUG)) { 4256 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4257 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4258 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4259 } 4260 } 4261 nnz = Ii[i + 1] - Ii[i]; 4262 Iii = Ii[i]; 4263 ldi = ld[i]; 4264 md = Adi[i + 1] - Adi[i]; 4265 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4266 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4267 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4268 ad += md; 4269 ao += nnz - md; 4270 } 4271 nooffprocentries = mat->nooffprocentries; 4272 mat->nooffprocentries = PETSC_TRUE; 4273 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4274 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4275 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4276 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4277 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4278 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4279 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4280 mat->nooffprocentries = nooffprocentries; 4281 PetscFunctionReturn(PETSC_SUCCESS); 4282 } 4283 4284 /*@ 4285 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4286 4287 Collective 4288 4289 Input Parameters: 4290 + mat - the matrix 4291 - v - matrix values, stored by row 4292 4293 Level: intermediate 4294 4295 Notes: 4296 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4297 4298 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4299 4300 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4301 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4302 @*/ 4303 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4304 { 4305 PetscInt nnz, i, m; 4306 PetscBool nooffprocentries; 4307 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4308 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4309 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4310 PetscScalar *ad, *ao; 4311 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4312 PetscInt ldi, Iii, md; 4313 PetscInt *ld = Aij->ld; 4314 4315 PetscFunctionBegin; 4316 m = mat->rmap->n; 4317 4318 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4319 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4320 Iii = 0; 4321 for (i = 0; i < m; i++) { 4322 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4323 ldi = ld[i]; 4324 md = Adi[i + 1] - Adi[i]; 4325 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4326 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4327 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4328 ad += md; 4329 ao += nnz - md; 4330 Iii += nnz; 4331 } 4332 nooffprocentries = mat->nooffprocentries; 4333 mat->nooffprocentries = PETSC_TRUE; 4334 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4335 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4336 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4337 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4338 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4339 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4340 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4341 mat->nooffprocentries = nooffprocentries; 4342 PetscFunctionReturn(PETSC_SUCCESS); 4343 } 4344 4345 /*@C 4346 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4347 (the default parallel PETSc format). For good matrix assembly performance 4348 the user should preallocate the matrix storage by setting the parameters 4349 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4350 4351 Collective 4352 4353 Input Parameters: 4354 + comm - MPI communicator 4355 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4356 This value should be the same as the local size used in creating the 4357 y vector for the matrix-vector product y = Ax. 4358 . n - This value should be the same as the local size used in creating the 4359 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4360 calculated if N is given) For square matrices n is almost always m. 4361 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4362 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4363 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4364 (same value is used for all local rows) 4365 . d_nnz - array containing the number of nonzeros in the various rows of the 4366 DIAGONAL portion of the local submatrix (possibly different for each row) 4367 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4368 The size of this array is equal to the number of local rows, i.e 'm'. 4369 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4370 submatrix (same value is used for all local rows). 4371 - o_nnz - array containing the number of nonzeros in the various rows of the 4372 OFF-DIAGONAL portion of the local submatrix (possibly different for 4373 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4374 structure. The size of this array is equal to the number 4375 of local rows, i.e 'm'. 4376 4377 Output Parameter: 4378 . A - the matrix 4379 4380 Options Database Keys: 4381 + -mat_no_inode - Do not use inodes 4382 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4383 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4384 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4385 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4386 4387 Level: intermediate 4388 4389 Notes: 4390 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4391 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4392 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4393 4394 If the *_nnz parameter is given then the *_nz parameter is ignored 4395 4396 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4397 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4398 storage requirements for this matrix. 4399 4400 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4401 processor than it must be used on all processors that share the object for 4402 that argument. 4403 4404 The user MUST specify either the local or global matrix dimensions 4405 (possibly both). 4406 4407 The parallel matrix is partitioned across processors such that the 4408 first m0 rows belong to process 0, the next m1 rows belong to 4409 process 1, the next m2 rows belong to process 2 etc.. where 4410 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4411 values corresponding to [m x N] submatrix. 4412 4413 The columns are logically partitioned with the n0 columns belonging 4414 to 0th partition, the next n1 columns belonging to the next 4415 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4416 4417 The DIAGONAL portion of the local submatrix on any given processor 4418 is the submatrix corresponding to the rows and columns m,n 4419 corresponding to the given processor. i.e diagonal matrix on 4420 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4421 etc. The remaining portion of the local submatrix [m x (N-n)] 4422 constitute the OFF-DIAGONAL portion. The example below better 4423 illustrates this concept. 4424 4425 For a square global matrix we define each processor's diagonal portion 4426 to be its local rows and the corresponding columns (a square submatrix); 4427 each processor's off-diagonal portion encompasses the remainder of the 4428 local matrix (a rectangular submatrix). 4429 4430 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4431 4432 When calling this routine with a single process communicator, a matrix of 4433 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4434 type of communicator, use the construction mechanism 4435 .vb 4436 MatCreate(..., &A); 4437 MatSetType(A, MATMPIAIJ); 4438 MatSetSizes(A, m, n, M, N); 4439 MatMPIAIJSetPreallocation(A, ...); 4440 .ve 4441 4442 By default, this format uses inodes (identical nodes) when possible. 4443 We search for consecutive rows with the same nonzero structure, thereby 4444 reusing matrix information to achieve increased efficiency. 4445 4446 Example Usage: 4447 Consider the following 8x8 matrix with 34 non-zero values, that is 4448 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4449 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4450 as follows 4451 4452 .vb 4453 1 2 0 | 0 3 0 | 0 4 4454 Proc0 0 5 6 | 7 0 0 | 8 0 4455 9 0 10 | 11 0 0 | 12 0 4456 ------------------------------------- 4457 13 0 14 | 15 16 17 | 0 0 4458 Proc1 0 18 0 | 19 20 21 | 0 0 4459 0 0 0 | 22 23 0 | 24 0 4460 ------------------------------------- 4461 Proc2 25 26 27 | 0 0 28 | 29 0 4462 30 0 0 | 31 32 33 | 0 34 4463 .ve 4464 4465 This can be represented as a collection of submatrices as 4466 4467 .vb 4468 A B C 4469 D E F 4470 G H I 4471 .ve 4472 4473 Where the submatrices A,B,C are owned by proc0, D,E,F are 4474 owned by proc1, G,H,I are owned by proc2. 4475 4476 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4477 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4478 The 'M','N' parameters are 8,8, and have the same values on all procs. 4479 4480 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4481 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4482 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4483 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4484 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4485 matrix, ans [DF] as another SeqAIJ matrix. 4486 4487 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4488 allocated for every row of the local diagonal submatrix, and `o_nz` 4489 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4490 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4491 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4492 In this case, the values of `d_nz`,`o_nz` are 4493 .vb 4494 proc0 dnz = 2, o_nz = 2 4495 proc1 dnz = 3, o_nz = 2 4496 proc2 dnz = 1, o_nz = 4 4497 .ve 4498 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4499 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4500 for proc3. i.e we are using 12+15+10=37 storage locations to store 4501 34 values. 4502 4503 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4504 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4505 In the above case the values for d_nnz,o_nnz are 4506 .vb 4507 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4508 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4509 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4510 .ve 4511 Here the space allocated is sum of all the above values i.e 34, and 4512 hence pre-allocation is perfect. 4513 4514 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4515 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4516 @*/ 4517 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4518 { 4519 PetscMPIInt size; 4520 4521 PetscFunctionBegin; 4522 PetscCall(MatCreate(comm, A)); 4523 PetscCall(MatSetSizes(*A, m, n, M, N)); 4524 PetscCallMPI(MPI_Comm_size(comm, &size)); 4525 if (size > 1) { 4526 PetscCall(MatSetType(*A, MATMPIAIJ)); 4527 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4528 } else { 4529 PetscCall(MatSetType(*A, MATSEQAIJ)); 4530 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4531 } 4532 PetscFunctionReturn(PETSC_SUCCESS); 4533 } 4534 4535 /*MC 4536 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4537 4538 Synopsis: 4539 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4540 4541 Not Collective 4542 4543 Input Parameter: 4544 . A - the `MATMPIAIJ` matrix 4545 4546 Output Parameters: 4547 + Ad - the diagonal portion of the matrix 4548 . Ao - the off-diagonal portion of the matrix 4549 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4550 - ierr - error code 4551 4552 Level: advanced 4553 4554 Note: 4555 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4556 4557 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4558 M*/ 4559 4560 /*MC 4561 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4562 4563 Synopsis: 4564 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4565 4566 Not Collective 4567 4568 Input Parameters: 4569 + A - the `MATMPIAIJ` matrix 4570 . Ad - the diagonal portion of the matrix 4571 . Ao - the off-diagonal portion of the matrix 4572 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4573 - ierr - error code 4574 4575 Level: advanced 4576 4577 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4578 M*/ 4579 4580 /*@C 4581 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4582 4583 Not Collective 4584 4585 Input Parameter: 4586 . A - The `MATMPIAIJ` matrix 4587 4588 Output Parameters: 4589 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4590 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4591 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4592 4593 Level: intermediate 4594 4595 Note: 4596 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4597 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4598 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4599 local column numbers to global column numbers in the original matrix. 4600 4601 Fortran Notes: 4602 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4603 4604 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4605 @*/ 4606 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4607 { 4608 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4609 PetscBool flg; 4610 4611 PetscFunctionBegin; 4612 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4613 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4614 if (Ad) *Ad = a->A; 4615 if (Ao) *Ao = a->B; 4616 if (colmap) *colmap = a->garray; 4617 PetscFunctionReturn(PETSC_SUCCESS); 4618 } 4619 4620 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4621 { 4622 PetscInt m, N, i, rstart, nnz, Ii; 4623 PetscInt *indx; 4624 PetscScalar *values; 4625 MatType rootType; 4626 4627 PetscFunctionBegin; 4628 PetscCall(MatGetSize(inmat, &m, &N)); 4629 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4630 PetscInt *dnz, *onz, sum, bs, cbs; 4631 4632 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4633 /* Check sum(n) = N */ 4634 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4635 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4636 4637 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4638 rstart -= m; 4639 4640 MatPreallocateBegin(comm, m, n, dnz, onz); 4641 for (i = 0; i < m; i++) { 4642 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4643 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4644 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4645 } 4646 4647 PetscCall(MatCreate(comm, outmat)); 4648 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4649 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4650 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4651 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4652 PetscCall(MatSetType(*outmat, rootType)); 4653 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4654 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4655 MatPreallocateEnd(dnz, onz); 4656 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4657 } 4658 4659 /* numeric phase */ 4660 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4661 for (i = 0; i < m; i++) { 4662 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4663 Ii = i + rstart; 4664 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4665 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4666 } 4667 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4668 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4669 PetscFunctionReturn(PETSC_SUCCESS); 4670 } 4671 4672 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4673 { 4674 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4675 4676 PetscFunctionBegin; 4677 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4678 PetscCall(PetscFree(merge->id_r)); 4679 PetscCall(PetscFree(merge->len_s)); 4680 PetscCall(PetscFree(merge->len_r)); 4681 PetscCall(PetscFree(merge->bi)); 4682 PetscCall(PetscFree(merge->bj)); 4683 PetscCall(PetscFree(merge->buf_ri[0])); 4684 PetscCall(PetscFree(merge->buf_ri)); 4685 PetscCall(PetscFree(merge->buf_rj[0])); 4686 PetscCall(PetscFree(merge->buf_rj)); 4687 PetscCall(PetscFree(merge->coi)); 4688 PetscCall(PetscFree(merge->coj)); 4689 PetscCall(PetscFree(merge->owners_co)); 4690 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4691 PetscCall(PetscFree(merge)); 4692 PetscFunctionReturn(PETSC_SUCCESS); 4693 } 4694 4695 #include <../src/mat/utils/freespace.h> 4696 #include <petscbt.h> 4697 4698 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4699 { 4700 MPI_Comm comm; 4701 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4702 PetscMPIInt size, rank, taga, *len_s; 4703 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4704 PetscInt proc, m; 4705 PetscInt **buf_ri, **buf_rj; 4706 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4707 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4708 MPI_Request *s_waits, *r_waits; 4709 MPI_Status *status; 4710 const MatScalar *aa, *a_a; 4711 MatScalar **abuf_r, *ba_i; 4712 Mat_Merge_SeqsToMPI *merge; 4713 PetscContainer container; 4714 4715 PetscFunctionBegin; 4716 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4717 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4718 4719 PetscCallMPI(MPI_Comm_size(comm, &size)); 4720 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4721 4722 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4723 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4724 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4725 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4726 aa = a_a; 4727 4728 bi = merge->bi; 4729 bj = merge->bj; 4730 buf_ri = merge->buf_ri; 4731 buf_rj = merge->buf_rj; 4732 4733 PetscCall(PetscMalloc1(size, &status)); 4734 owners = merge->rowmap->range; 4735 len_s = merge->len_s; 4736 4737 /* send and recv matrix values */ 4738 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4739 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4740 4741 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4742 for (proc = 0, k = 0; proc < size; proc++) { 4743 if (!len_s[proc]) continue; 4744 i = owners[proc]; 4745 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4746 k++; 4747 } 4748 4749 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4750 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4751 PetscCall(PetscFree(status)); 4752 4753 PetscCall(PetscFree(s_waits)); 4754 PetscCall(PetscFree(r_waits)); 4755 4756 /* insert mat values of mpimat */ 4757 PetscCall(PetscMalloc1(N, &ba_i)); 4758 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4759 4760 for (k = 0; k < merge->nrecv; k++) { 4761 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4762 nrows = *(buf_ri_k[k]); 4763 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4764 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4765 } 4766 4767 /* set values of ba */ 4768 m = merge->rowmap->n; 4769 for (i = 0; i < m; i++) { 4770 arow = owners[rank] + i; 4771 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4772 bnzi = bi[i + 1] - bi[i]; 4773 PetscCall(PetscArrayzero(ba_i, bnzi)); 4774 4775 /* add local non-zero vals of this proc's seqmat into ba */ 4776 anzi = ai[arow + 1] - ai[arow]; 4777 aj = a->j + ai[arow]; 4778 aa = a_a + ai[arow]; 4779 nextaj = 0; 4780 for (j = 0; nextaj < anzi; j++) { 4781 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4782 ba_i[j] += aa[nextaj++]; 4783 } 4784 } 4785 4786 /* add received vals into ba */ 4787 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4788 /* i-th row */ 4789 if (i == *nextrow[k]) { 4790 anzi = *(nextai[k] + 1) - *nextai[k]; 4791 aj = buf_rj[k] + *(nextai[k]); 4792 aa = abuf_r[k] + *(nextai[k]); 4793 nextaj = 0; 4794 for (j = 0; nextaj < anzi; j++) { 4795 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4796 ba_i[j] += aa[nextaj++]; 4797 } 4798 } 4799 nextrow[k]++; 4800 nextai[k]++; 4801 } 4802 } 4803 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4804 } 4805 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4806 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4807 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4808 4809 PetscCall(PetscFree(abuf_r[0])); 4810 PetscCall(PetscFree(abuf_r)); 4811 PetscCall(PetscFree(ba_i)); 4812 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4813 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4814 PetscFunctionReturn(PETSC_SUCCESS); 4815 } 4816 4817 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4818 { 4819 Mat B_mpi; 4820 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4821 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4822 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4823 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4824 PetscInt len, proc, *dnz, *onz, bs, cbs; 4825 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4826 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4827 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4828 MPI_Status *status; 4829 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4830 PetscBT lnkbt; 4831 Mat_Merge_SeqsToMPI *merge; 4832 PetscContainer container; 4833 4834 PetscFunctionBegin; 4835 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4836 4837 /* make sure it is a PETSc comm */ 4838 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4839 PetscCallMPI(MPI_Comm_size(comm, &size)); 4840 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4841 4842 PetscCall(PetscNew(&merge)); 4843 PetscCall(PetscMalloc1(size, &status)); 4844 4845 /* determine row ownership */ 4846 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4847 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4848 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4849 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4850 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4851 PetscCall(PetscMalloc1(size, &len_si)); 4852 PetscCall(PetscMalloc1(size, &merge->len_s)); 4853 4854 m = merge->rowmap->n; 4855 owners = merge->rowmap->range; 4856 4857 /* determine the number of messages to send, their lengths */ 4858 len_s = merge->len_s; 4859 4860 len = 0; /* length of buf_si[] */ 4861 merge->nsend = 0; 4862 for (proc = 0; proc < size; proc++) { 4863 len_si[proc] = 0; 4864 if (proc == rank) { 4865 len_s[proc] = 0; 4866 } else { 4867 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4868 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4869 } 4870 if (len_s[proc]) { 4871 merge->nsend++; 4872 nrows = 0; 4873 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4874 if (ai[i + 1] > ai[i]) nrows++; 4875 } 4876 len_si[proc] = 2 * (nrows + 1); 4877 len += len_si[proc]; 4878 } 4879 } 4880 4881 /* determine the number and length of messages to receive for ij-structure */ 4882 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4883 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4884 4885 /* post the Irecv of j-structure */ 4886 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4887 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4888 4889 /* post the Isend of j-structure */ 4890 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4891 4892 for (proc = 0, k = 0; proc < size; proc++) { 4893 if (!len_s[proc]) continue; 4894 i = owners[proc]; 4895 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4896 k++; 4897 } 4898 4899 /* receives and sends of j-structure are complete */ 4900 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4901 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4902 4903 /* send and recv i-structure */ 4904 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4905 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4906 4907 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4908 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4909 for (proc = 0, k = 0; proc < size; proc++) { 4910 if (!len_s[proc]) continue; 4911 /* form outgoing message for i-structure: 4912 buf_si[0]: nrows to be sent 4913 [1:nrows]: row index (global) 4914 [nrows+1:2*nrows+1]: i-structure index 4915 */ 4916 nrows = len_si[proc] / 2 - 1; 4917 buf_si_i = buf_si + nrows + 1; 4918 buf_si[0] = nrows; 4919 buf_si_i[0] = 0; 4920 nrows = 0; 4921 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4922 anzi = ai[i + 1] - ai[i]; 4923 if (anzi) { 4924 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4925 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4926 nrows++; 4927 } 4928 } 4929 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4930 k++; 4931 buf_si += len_si[proc]; 4932 } 4933 4934 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4935 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4936 4937 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4938 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4939 4940 PetscCall(PetscFree(len_si)); 4941 PetscCall(PetscFree(len_ri)); 4942 PetscCall(PetscFree(rj_waits)); 4943 PetscCall(PetscFree2(si_waits, sj_waits)); 4944 PetscCall(PetscFree(ri_waits)); 4945 PetscCall(PetscFree(buf_s)); 4946 PetscCall(PetscFree(status)); 4947 4948 /* compute a local seq matrix in each processor */ 4949 /* allocate bi array and free space for accumulating nonzero column info */ 4950 PetscCall(PetscMalloc1(m + 1, &bi)); 4951 bi[0] = 0; 4952 4953 /* create and initialize a linked list */ 4954 nlnk = N + 1; 4955 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4956 4957 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4958 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4959 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4960 4961 current_space = free_space; 4962 4963 /* determine symbolic info for each local row */ 4964 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4965 4966 for (k = 0; k < merge->nrecv; k++) { 4967 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4968 nrows = *buf_ri_k[k]; 4969 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4970 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4971 } 4972 4973 MatPreallocateBegin(comm, m, n, dnz, onz); 4974 len = 0; 4975 for (i = 0; i < m; i++) { 4976 bnzi = 0; 4977 /* add local non-zero cols of this proc's seqmat into lnk */ 4978 arow = owners[rank] + i; 4979 anzi = ai[arow + 1] - ai[arow]; 4980 aj = a->j + ai[arow]; 4981 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4982 bnzi += nlnk; 4983 /* add received col data into lnk */ 4984 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4985 if (i == *nextrow[k]) { /* i-th row */ 4986 anzi = *(nextai[k] + 1) - *nextai[k]; 4987 aj = buf_rj[k] + *nextai[k]; 4988 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4989 bnzi += nlnk; 4990 nextrow[k]++; 4991 nextai[k]++; 4992 } 4993 } 4994 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4995 4996 /* if free space is not available, make more free space */ 4997 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4998 /* copy data into free space, then initialize lnk */ 4999 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5000 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5001 5002 current_space->array += bnzi; 5003 current_space->local_used += bnzi; 5004 current_space->local_remaining -= bnzi; 5005 5006 bi[i + 1] = bi[i] + bnzi; 5007 } 5008 5009 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5010 5011 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5012 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5013 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5014 5015 /* create symbolic parallel matrix B_mpi */ 5016 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5017 PetscCall(MatCreate(comm, &B_mpi)); 5018 if (n == PETSC_DECIDE) { 5019 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5020 } else { 5021 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5022 } 5023 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5024 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5025 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5026 MatPreallocateEnd(dnz, onz); 5027 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5028 5029 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5030 B_mpi->assembled = PETSC_FALSE; 5031 merge->bi = bi; 5032 merge->bj = bj; 5033 merge->buf_ri = buf_ri; 5034 merge->buf_rj = buf_rj; 5035 merge->coi = NULL; 5036 merge->coj = NULL; 5037 merge->owners_co = NULL; 5038 5039 PetscCall(PetscCommDestroy(&comm)); 5040 5041 /* attach the supporting struct to B_mpi for reuse */ 5042 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5043 PetscCall(PetscContainerSetPointer(container, merge)); 5044 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5045 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5046 PetscCall(PetscContainerDestroy(&container)); 5047 *mpimat = B_mpi; 5048 5049 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5050 PetscFunctionReturn(PETSC_SUCCESS); 5051 } 5052 5053 /*@C 5054 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5055 matrices from each processor 5056 5057 Collective 5058 5059 Input Parameters: 5060 + comm - the communicators the parallel matrix will live on 5061 . seqmat - the input sequential matrices 5062 . m - number of local rows (or `PETSC_DECIDE`) 5063 . n - number of local columns (or `PETSC_DECIDE`) 5064 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5065 5066 Output Parameter: 5067 . mpimat - the parallel matrix generated 5068 5069 Level: advanced 5070 5071 Note: 5072 The dimensions of the sequential matrix in each processor MUST be the same. 5073 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5074 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5075 5076 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5077 @*/ 5078 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5079 { 5080 PetscMPIInt size; 5081 5082 PetscFunctionBegin; 5083 PetscCallMPI(MPI_Comm_size(comm, &size)); 5084 if (size == 1) { 5085 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5086 if (scall == MAT_INITIAL_MATRIX) { 5087 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5088 } else { 5089 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5090 } 5091 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5092 PetscFunctionReturn(PETSC_SUCCESS); 5093 } 5094 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5095 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5096 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5097 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5098 PetscFunctionReturn(PETSC_SUCCESS); 5099 } 5100 5101 /*@ 5102 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5103 5104 Not Collective 5105 5106 Input Parameter: 5107 . A - the matrix 5108 5109 Output Parameter: 5110 . A_loc - the local sequential matrix generated 5111 5112 Level: developer 5113 5114 Notes: 5115 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5116 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5117 `n` is the global column count obtained with `MatGetSize()` 5118 5119 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5120 5121 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5122 5123 Destroy the matrix with `MatDestroy()` 5124 5125 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5126 @*/ 5127 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5128 { 5129 PetscBool mpi; 5130 5131 PetscFunctionBegin; 5132 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5133 if (mpi) { 5134 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5135 } else { 5136 *A_loc = A; 5137 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5138 } 5139 PetscFunctionReturn(PETSC_SUCCESS); 5140 } 5141 5142 /*@ 5143 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5144 5145 Not Collective 5146 5147 Input Parameters: 5148 + A - the matrix 5149 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5150 5151 Output Parameter: 5152 . A_loc - the local sequential matrix generated 5153 5154 Level: developer 5155 5156 Notes: 5157 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5158 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5159 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5160 5161 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5162 5163 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5164 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5165 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5166 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5167 5168 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5169 @*/ 5170 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5171 { 5172 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5173 Mat_SeqAIJ *mat, *a, *b; 5174 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5175 const PetscScalar *aa, *ba, *aav, *bav; 5176 PetscScalar *ca, *cam; 5177 PetscMPIInt size; 5178 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5179 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5180 PetscBool match; 5181 5182 PetscFunctionBegin; 5183 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5184 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5185 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5186 if (size == 1) { 5187 if (scall == MAT_INITIAL_MATRIX) { 5188 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5189 *A_loc = mpimat->A; 5190 } else if (scall == MAT_REUSE_MATRIX) { 5191 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5192 } 5193 PetscFunctionReturn(PETSC_SUCCESS); 5194 } 5195 5196 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5197 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5198 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5199 ai = a->i; 5200 aj = a->j; 5201 bi = b->i; 5202 bj = b->j; 5203 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5204 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5205 aa = aav; 5206 ba = bav; 5207 if (scall == MAT_INITIAL_MATRIX) { 5208 PetscCall(PetscMalloc1(1 + am, &ci)); 5209 ci[0] = 0; 5210 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5211 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5212 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5213 k = 0; 5214 for (i = 0; i < am; i++) { 5215 ncols_o = bi[i + 1] - bi[i]; 5216 ncols_d = ai[i + 1] - ai[i]; 5217 /* off-diagonal portion of A */ 5218 for (jo = 0; jo < ncols_o; jo++) { 5219 col = cmap[*bj]; 5220 if (col >= cstart) break; 5221 cj[k] = col; 5222 bj++; 5223 ca[k++] = *ba++; 5224 } 5225 /* diagonal portion of A */ 5226 for (j = 0; j < ncols_d; j++) { 5227 cj[k] = cstart + *aj++; 5228 ca[k++] = *aa++; 5229 } 5230 /* off-diagonal portion of A */ 5231 for (j = jo; j < ncols_o; j++) { 5232 cj[k] = cmap[*bj++]; 5233 ca[k++] = *ba++; 5234 } 5235 } 5236 /* put together the new matrix */ 5237 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5238 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5239 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5240 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5241 mat->free_a = PETSC_TRUE; 5242 mat->free_ij = PETSC_TRUE; 5243 mat->nonew = 0; 5244 } else if (scall == MAT_REUSE_MATRIX) { 5245 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5246 ci = mat->i; 5247 cj = mat->j; 5248 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5249 for (i = 0; i < am; i++) { 5250 /* off-diagonal portion of A */ 5251 ncols_o = bi[i + 1] - bi[i]; 5252 for (jo = 0; jo < ncols_o; jo++) { 5253 col = cmap[*bj]; 5254 if (col >= cstart) break; 5255 *cam++ = *ba++; 5256 bj++; 5257 } 5258 /* diagonal portion of A */ 5259 ncols_d = ai[i + 1] - ai[i]; 5260 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5261 /* off-diagonal portion of A */ 5262 for (j = jo; j < ncols_o; j++) { 5263 *cam++ = *ba++; 5264 bj++; 5265 } 5266 } 5267 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5268 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5269 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5270 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5271 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5272 PetscFunctionReturn(PETSC_SUCCESS); 5273 } 5274 5275 /*@ 5276 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5277 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5278 5279 Not Collective 5280 5281 Input Parameters: 5282 + A - the matrix 5283 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5284 5285 Output Parameters: 5286 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5287 - A_loc - the local sequential matrix generated 5288 5289 Level: developer 5290 5291 Note: 5292 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5293 part, then those associated with the off-diagonal part (in its local ordering) 5294 5295 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5296 @*/ 5297 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5298 { 5299 Mat Ao, Ad; 5300 const PetscInt *cmap; 5301 PetscMPIInt size; 5302 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5303 5304 PetscFunctionBegin; 5305 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5306 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5307 if (size == 1) { 5308 if (scall == MAT_INITIAL_MATRIX) { 5309 PetscCall(PetscObjectReference((PetscObject)Ad)); 5310 *A_loc = Ad; 5311 } else if (scall == MAT_REUSE_MATRIX) { 5312 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5313 } 5314 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5315 PetscFunctionReturn(PETSC_SUCCESS); 5316 } 5317 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5318 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5319 if (f) { 5320 PetscCall((*f)(A, scall, glob, A_loc)); 5321 } else { 5322 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5323 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5324 Mat_SeqAIJ *c; 5325 PetscInt *ai = a->i, *aj = a->j; 5326 PetscInt *bi = b->i, *bj = b->j; 5327 PetscInt *ci, *cj; 5328 const PetscScalar *aa, *ba; 5329 PetscScalar *ca; 5330 PetscInt i, j, am, dn, on; 5331 5332 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5333 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5334 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5335 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5336 if (scall == MAT_INITIAL_MATRIX) { 5337 PetscInt k; 5338 PetscCall(PetscMalloc1(1 + am, &ci)); 5339 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5340 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5341 ci[0] = 0; 5342 for (i = 0, k = 0; i < am; i++) { 5343 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5344 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5345 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5346 /* diagonal portion of A */ 5347 for (j = 0; j < ncols_d; j++, k++) { 5348 cj[k] = *aj++; 5349 ca[k] = *aa++; 5350 } 5351 /* off-diagonal portion of A */ 5352 for (j = 0; j < ncols_o; j++, k++) { 5353 cj[k] = dn + *bj++; 5354 ca[k] = *ba++; 5355 } 5356 } 5357 /* put together the new matrix */ 5358 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5359 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5360 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5361 c = (Mat_SeqAIJ *)(*A_loc)->data; 5362 c->free_a = PETSC_TRUE; 5363 c->free_ij = PETSC_TRUE; 5364 c->nonew = 0; 5365 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5366 } else if (scall == MAT_REUSE_MATRIX) { 5367 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5368 for (i = 0; i < am; i++) { 5369 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5370 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5371 /* diagonal portion of A */ 5372 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5373 /* off-diagonal portion of A */ 5374 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5375 } 5376 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5377 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5378 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5379 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5380 if (glob) { 5381 PetscInt cst, *gidx; 5382 5383 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5384 PetscCall(PetscMalloc1(dn + on, &gidx)); 5385 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5386 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5387 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5388 } 5389 } 5390 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5391 PetscFunctionReturn(PETSC_SUCCESS); 5392 } 5393 5394 /*@C 5395 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5396 5397 Not Collective 5398 5399 Input Parameters: 5400 + A - the matrix 5401 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5402 . row - index set of rows to extract (or `NULL`) 5403 - col - index set of columns to extract (or `NULL`) 5404 5405 Output Parameter: 5406 . A_loc - the local sequential matrix generated 5407 5408 Level: developer 5409 5410 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5411 @*/ 5412 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5413 { 5414 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5415 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5416 IS isrowa, iscola; 5417 Mat *aloc; 5418 PetscBool match; 5419 5420 PetscFunctionBegin; 5421 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5422 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5423 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5424 if (!row) { 5425 start = A->rmap->rstart; 5426 end = A->rmap->rend; 5427 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5428 } else { 5429 isrowa = *row; 5430 } 5431 if (!col) { 5432 start = A->cmap->rstart; 5433 cmap = a->garray; 5434 nzA = a->A->cmap->n; 5435 nzB = a->B->cmap->n; 5436 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5437 ncols = 0; 5438 for (i = 0; i < nzB; i++) { 5439 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5440 else break; 5441 } 5442 imark = i; 5443 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5444 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5445 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5446 } else { 5447 iscola = *col; 5448 } 5449 if (scall != MAT_INITIAL_MATRIX) { 5450 PetscCall(PetscMalloc1(1, &aloc)); 5451 aloc[0] = *A_loc; 5452 } 5453 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5454 if (!col) { /* attach global id of condensed columns */ 5455 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5456 } 5457 *A_loc = aloc[0]; 5458 PetscCall(PetscFree(aloc)); 5459 if (!row) PetscCall(ISDestroy(&isrowa)); 5460 if (!col) PetscCall(ISDestroy(&iscola)); 5461 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5462 PetscFunctionReturn(PETSC_SUCCESS); 5463 } 5464 5465 /* 5466 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5467 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5468 * on a global size. 5469 * */ 5470 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5471 { 5472 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5473 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5474 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5475 PetscMPIInt owner; 5476 PetscSFNode *iremote, *oiremote; 5477 const PetscInt *lrowindices; 5478 PetscSF sf, osf; 5479 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5480 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5481 MPI_Comm comm; 5482 ISLocalToGlobalMapping mapping; 5483 const PetscScalar *pd_a, *po_a; 5484 5485 PetscFunctionBegin; 5486 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5487 /* plocalsize is the number of roots 5488 * nrows is the number of leaves 5489 * */ 5490 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5491 PetscCall(ISGetLocalSize(rows, &nrows)); 5492 PetscCall(PetscCalloc1(nrows, &iremote)); 5493 PetscCall(ISGetIndices(rows, &lrowindices)); 5494 for (i = 0; i < nrows; i++) { 5495 /* Find a remote index and an owner for a row 5496 * The row could be local or remote 5497 * */ 5498 owner = 0; 5499 lidx = 0; 5500 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5501 iremote[i].index = lidx; 5502 iremote[i].rank = owner; 5503 } 5504 /* Create SF to communicate how many nonzero columns for each row */ 5505 PetscCall(PetscSFCreate(comm, &sf)); 5506 /* SF will figure out the number of nonzero columns for each row, and their 5507 * offsets 5508 * */ 5509 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5510 PetscCall(PetscSFSetFromOptions(sf)); 5511 PetscCall(PetscSFSetUp(sf)); 5512 5513 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5514 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5515 PetscCall(PetscCalloc1(nrows, &pnnz)); 5516 roffsets[0] = 0; 5517 roffsets[1] = 0; 5518 for (i = 0; i < plocalsize; i++) { 5519 /* diagonal */ 5520 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5521 /* off-diagonal */ 5522 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5523 /* compute offsets so that we relative location for each row */ 5524 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5525 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5526 } 5527 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5528 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5529 /* 'r' means root, and 'l' means leaf */ 5530 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5531 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5532 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5533 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5534 PetscCall(PetscSFDestroy(&sf)); 5535 PetscCall(PetscFree(roffsets)); 5536 PetscCall(PetscFree(nrcols)); 5537 dntotalcols = 0; 5538 ontotalcols = 0; 5539 ncol = 0; 5540 for (i = 0; i < nrows; i++) { 5541 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5542 ncol = PetscMax(pnnz[i], ncol); 5543 /* diagonal */ 5544 dntotalcols += nlcols[i * 2 + 0]; 5545 /* off-diagonal */ 5546 ontotalcols += nlcols[i * 2 + 1]; 5547 } 5548 /* We do not need to figure the right number of columns 5549 * since all the calculations will be done by going through the raw data 5550 * */ 5551 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5552 PetscCall(MatSetUp(*P_oth)); 5553 PetscCall(PetscFree(pnnz)); 5554 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5555 /* diagonal */ 5556 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5557 /* off-diagonal */ 5558 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5559 /* diagonal */ 5560 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5561 /* off-diagonal */ 5562 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5563 dntotalcols = 0; 5564 ontotalcols = 0; 5565 ntotalcols = 0; 5566 for (i = 0; i < nrows; i++) { 5567 owner = 0; 5568 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5569 /* Set iremote for diag matrix */ 5570 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5571 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5572 iremote[dntotalcols].rank = owner; 5573 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5574 ilocal[dntotalcols++] = ntotalcols++; 5575 } 5576 /* off-diagonal */ 5577 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5578 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5579 oiremote[ontotalcols].rank = owner; 5580 oilocal[ontotalcols++] = ntotalcols++; 5581 } 5582 } 5583 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5584 PetscCall(PetscFree(loffsets)); 5585 PetscCall(PetscFree(nlcols)); 5586 PetscCall(PetscSFCreate(comm, &sf)); 5587 /* P serves as roots and P_oth is leaves 5588 * Diag matrix 5589 * */ 5590 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5591 PetscCall(PetscSFSetFromOptions(sf)); 5592 PetscCall(PetscSFSetUp(sf)); 5593 5594 PetscCall(PetscSFCreate(comm, &osf)); 5595 /* off-diagonal */ 5596 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5597 PetscCall(PetscSFSetFromOptions(osf)); 5598 PetscCall(PetscSFSetUp(osf)); 5599 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5600 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5601 /* operate on the matrix internal data to save memory */ 5602 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5603 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5604 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5605 /* Convert to global indices for diag matrix */ 5606 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5607 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5608 /* We want P_oth store global indices */ 5609 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5610 /* Use memory scalable approach */ 5611 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5612 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5613 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5614 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5615 /* Convert back to local indices */ 5616 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5617 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5618 nout = 0; 5619 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5620 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5621 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5622 /* Exchange values */ 5623 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5624 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5625 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5626 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5627 /* Stop PETSc from shrinking memory */ 5628 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5629 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5630 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5631 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5632 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5633 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5634 PetscCall(PetscSFDestroy(&sf)); 5635 PetscCall(PetscSFDestroy(&osf)); 5636 PetscFunctionReturn(PETSC_SUCCESS); 5637 } 5638 5639 /* 5640 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5641 * This supports MPIAIJ and MAIJ 5642 * */ 5643 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5644 { 5645 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5646 Mat_SeqAIJ *p_oth; 5647 IS rows, map; 5648 PetscHMapI hamp; 5649 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5650 MPI_Comm comm; 5651 PetscSF sf, osf; 5652 PetscBool has; 5653 5654 PetscFunctionBegin; 5655 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5656 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5657 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5658 * and then create a submatrix (that often is an overlapping matrix) 5659 * */ 5660 if (reuse == MAT_INITIAL_MATRIX) { 5661 /* Use a hash table to figure out unique keys */ 5662 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5663 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5664 count = 0; 5665 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5666 for (i = 0; i < a->B->cmap->n; i++) { 5667 key = a->garray[i] / dof; 5668 PetscCall(PetscHMapIHas(hamp, key, &has)); 5669 if (!has) { 5670 mapping[i] = count; 5671 PetscCall(PetscHMapISet(hamp, key, count++)); 5672 } else { 5673 /* Current 'i' has the same value the previous step */ 5674 mapping[i] = count - 1; 5675 } 5676 } 5677 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5678 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5679 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5680 PetscCall(PetscCalloc1(htsize, &rowindices)); 5681 off = 0; 5682 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5683 PetscCall(PetscHMapIDestroy(&hamp)); 5684 PetscCall(PetscSortInt(htsize, rowindices)); 5685 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5686 /* In case, the matrix was already created but users want to recreate the matrix */ 5687 PetscCall(MatDestroy(P_oth)); 5688 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5689 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5690 PetscCall(ISDestroy(&map)); 5691 PetscCall(ISDestroy(&rows)); 5692 } else if (reuse == MAT_REUSE_MATRIX) { 5693 /* If matrix was already created, we simply update values using SF objects 5694 * that as attached to the matrix earlier. 5695 */ 5696 const PetscScalar *pd_a, *po_a; 5697 5698 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5699 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5700 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5701 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5702 /* Update values in place */ 5703 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5704 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5705 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5706 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5707 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5708 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5709 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5710 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5711 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5712 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5713 PetscFunctionReturn(PETSC_SUCCESS); 5714 } 5715 5716 /*@C 5717 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5718 5719 Collective 5720 5721 Input Parameters: 5722 + A - the first matrix in `MATMPIAIJ` format 5723 . B - the second matrix in `MATMPIAIJ` format 5724 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5725 5726 Output Parameters: 5727 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5728 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5729 - B_seq - the sequential matrix generated 5730 5731 Level: developer 5732 5733 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5734 @*/ 5735 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5736 { 5737 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5738 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5739 IS isrowb, iscolb; 5740 Mat *bseq = NULL; 5741 5742 PetscFunctionBegin; 5743 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5744 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5745 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5746 5747 if (scall == MAT_INITIAL_MATRIX) { 5748 start = A->cmap->rstart; 5749 cmap = a->garray; 5750 nzA = a->A->cmap->n; 5751 nzB = a->B->cmap->n; 5752 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5753 ncols = 0; 5754 for (i = 0; i < nzB; i++) { /* row < local row index */ 5755 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5756 else break; 5757 } 5758 imark = i; 5759 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5760 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5761 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5762 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5763 } else { 5764 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5765 isrowb = *rowb; 5766 iscolb = *colb; 5767 PetscCall(PetscMalloc1(1, &bseq)); 5768 bseq[0] = *B_seq; 5769 } 5770 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5771 *B_seq = bseq[0]; 5772 PetscCall(PetscFree(bseq)); 5773 if (!rowb) { 5774 PetscCall(ISDestroy(&isrowb)); 5775 } else { 5776 *rowb = isrowb; 5777 } 5778 if (!colb) { 5779 PetscCall(ISDestroy(&iscolb)); 5780 } else { 5781 *colb = iscolb; 5782 } 5783 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5784 PetscFunctionReturn(PETSC_SUCCESS); 5785 } 5786 5787 /* 5788 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5789 of the OFF-DIAGONAL portion of local A 5790 5791 Collective 5792 5793 Input Parameters: 5794 + A,B - the matrices in `MATMPIAIJ` format 5795 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5796 5797 Output Parameter: 5798 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5799 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5800 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5801 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5802 5803 Developer Note: 5804 This directly accesses information inside the VecScatter associated with the matrix-vector product 5805 for this matrix. This is not desirable.. 5806 5807 Level: developer 5808 5809 */ 5810 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5811 { 5812 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5813 Mat_SeqAIJ *b_oth; 5814 VecScatter ctx; 5815 MPI_Comm comm; 5816 const PetscMPIInt *rprocs, *sprocs; 5817 const PetscInt *srow, *rstarts, *sstarts; 5818 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5819 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5820 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5821 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5822 PetscMPIInt size, tag, rank, nreqs; 5823 5824 PetscFunctionBegin; 5825 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5826 PetscCallMPI(MPI_Comm_size(comm, &size)); 5827 5828 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5829 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5830 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5831 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5832 5833 if (size == 1) { 5834 startsj_s = NULL; 5835 bufa_ptr = NULL; 5836 *B_oth = NULL; 5837 PetscFunctionReturn(PETSC_SUCCESS); 5838 } 5839 5840 ctx = a->Mvctx; 5841 tag = ((PetscObject)ctx)->tag; 5842 5843 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5844 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5845 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5846 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5847 PetscCall(PetscMalloc1(nreqs, &reqs)); 5848 rwaits = reqs; 5849 swaits = reqs + nrecvs; 5850 5851 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5852 if (scall == MAT_INITIAL_MATRIX) { 5853 /* i-array */ 5854 /* post receives */ 5855 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5856 for (i = 0; i < nrecvs; i++) { 5857 rowlen = rvalues + rstarts[i] * rbs; 5858 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5859 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5860 } 5861 5862 /* pack the outgoing message */ 5863 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5864 5865 sstartsj[0] = 0; 5866 rstartsj[0] = 0; 5867 len = 0; /* total length of j or a array to be sent */ 5868 if (nsends) { 5869 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5870 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5871 } 5872 for (i = 0; i < nsends; i++) { 5873 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5874 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5875 for (j = 0; j < nrows; j++) { 5876 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5877 for (l = 0; l < sbs; l++) { 5878 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5879 5880 rowlen[j * sbs + l] = ncols; 5881 5882 len += ncols; 5883 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5884 } 5885 k++; 5886 } 5887 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5888 5889 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5890 } 5891 /* recvs and sends of i-array are completed */ 5892 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5893 PetscCall(PetscFree(svalues)); 5894 5895 /* allocate buffers for sending j and a arrays */ 5896 PetscCall(PetscMalloc1(len + 1, &bufj)); 5897 PetscCall(PetscMalloc1(len + 1, &bufa)); 5898 5899 /* create i-array of B_oth */ 5900 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5901 5902 b_othi[0] = 0; 5903 len = 0; /* total length of j or a array to be received */ 5904 k = 0; 5905 for (i = 0; i < nrecvs; i++) { 5906 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5907 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5908 for (j = 0; j < nrows; j++) { 5909 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5910 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5911 k++; 5912 } 5913 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5914 } 5915 PetscCall(PetscFree(rvalues)); 5916 5917 /* allocate space for j and a arrays of B_oth */ 5918 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5919 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5920 5921 /* j-array */ 5922 /* post receives of j-array */ 5923 for (i = 0; i < nrecvs; i++) { 5924 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5925 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5926 } 5927 5928 /* pack the outgoing message j-array */ 5929 if (nsends) k = sstarts[0]; 5930 for (i = 0; i < nsends; i++) { 5931 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5932 bufJ = bufj + sstartsj[i]; 5933 for (j = 0; j < nrows; j++) { 5934 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5935 for (ll = 0; ll < sbs; ll++) { 5936 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5937 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5938 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5939 } 5940 } 5941 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5942 } 5943 5944 /* recvs and sends of j-array are completed */ 5945 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5946 } else if (scall == MAT_REUSE_MATRIX) { 5947 sstartsj = *startsj_s; 5948 rstartsj = *startsj_r; 5949 bufa = *bufa_ptr; 5950 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5951 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5952 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5953 5954 /* a-array */ 5955 /* post receives of a-array */ 5956 for (i = 0; i < nrecvs; i++) { 5957 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5958 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5959 } 5960 5961 /* pack the outgoing message a-array */ 5962 if (nsends) k = sstarts[0]; 5963 for (i = 0; i < nsends; i++) { 5964 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5965 bufA = bufa + sstartsj[i]; 5966 for (j = 0; j < nrows; j++) { 5967 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5968 for (ll = 0; ll < sbs; ll++) { 5969 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5970 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5971 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5972 } 5973 } 5974 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5975 } 5976 /* recvs and sends of a-array are completed */ 5977 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5978 PetscCall(PetscFree(reqs)); 5979 5980 if (scall == MAT_INITIAL_MATRIX) { 5981 /* put together the new matrix */ 5982 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5983 5984 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5985 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5986 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5987 b_oth->free_a = PETSC_TRUE; 5988 b_oth->free_ij = PETSC_TRUE; 5989 b_oth->nonew = 0; 5990 5991 PetscCall(PetscFree(bufj)); 5992 if (!startsj_s || !bufa_ptr) { 5993 PetscCall(PetscFree2(sstartsj, rstartsj)); 5994 PetscCall(PetscFree(bufa_ptr)); 5995 } else { 5996 *startsj_s = sstartsj; 5997 *startsj_r = rstartsj; 5998 *bufa_ptr = bufa; 5999 } 6000 } else if (scall == MAT_REUSE_MATRIX) { 6001 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6002 } 6003 6004 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6005 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6006 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6007 PetscFunctionReturn(PETSC_SUCCESS); 6008 } 6009 6010 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6011 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6012 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6013 #if defined(PETSC_HAVE_MKL_SPARSE) 6014 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6015 #endif 6016 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6017 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6018 #if defined(PETSC_HAVE_ELEMENTAL) 6019 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6020 #endif 6021 #if defined(PETSC_HAVE_SCALAPACK) 6022 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6023 #endif 6024 #if defined(PETSC_HAVE_HYPRE) 6025 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6026 #endif 6027 #if defined(PETSC_HAVE_CUDA) 6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6029 #endif 6030 #if defined(PETSC_HAVE_HIP) 6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6032 #endif 6033 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6035 #endif 6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6037 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6038 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6039 6040 /* 6041 Computes (B'*A')' since computing B*A directly is untenable 6042 6043 n p p 6044 [ ] [ ] [ ] 6045 m [ A ] * n [ B ] = m [ C ] 6046 [ ] [ ] [ ] 6047 6048 */ 6049 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6050 { 6051 Mat At, Bt, Ct; 6052 6053 PetscFunctionBegin; 6054 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6055 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6056 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6057 PetscCall(MatDestroy(&At)); 6058 PetscCall(MatDestroy(&Bt)); 6059 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6060 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6061 PetscCall(MatDestroy(&Ct)); 6062 PetscFunctionReturn(PETSC_SUCCESS); 6063 } 6064 6065 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6066 { 6067 PetscBool cisdense; 6068 6069 PetscFunctionBegin; 6070 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6071 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6072 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6073 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6074 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6075 PetscCall(MatSetUp(C)); 6076 6077 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6078 PetscFunctionReturn(PETSC_SUCCESS); 6079 } 6080 6081 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6082 { 6083 Mat_Product *product = C->product; 6084 Mat A = product->A, B = product->B; 6085 6086 PetscFunctionBegin; 6087 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6088 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6089 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6090 C->ops->productsymbolic = MatProductSymbolic_AB; 6091 PetscFunctionReturn(PETSC_SUCCESS); 6092 } 6093 6094 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6095 { 6096 Mat_Product *product = C->product; 6097 6098 PetscFunctionBegin; 6099 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6100 PetscFunctionReturn(PETSC_SUCCESS); 6101 } 6102 6103 /* 6104 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6105 6106 Input Parameters: 6107 6108 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6109 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6110 6111 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6112 6113 For Set1, j1[] contains column indices of the nonzeros. 6114 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6115 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6116 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6117 6118 Similar for Set2. 6119 6120 This routine merges the two sets of nonzeros row by row and removes repeats. 6121 6122 Output Parameters: (memory is allocated by the caller) 6123 6124 i[],j[]: the CSR of the merged matrix, which has m rows. 6125 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6126 imap2[]: similar to imap1[], but for Set2. 6127 Note we order nonzeros row-by-row and from left to right. 6128 */ 6129 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6130 { 6131 PetscInt r, m; /* Row index of mat */ 6132 PetscCount t, t1, t2, b1, e1, b2, e2; 6133 6134 PetscFunctionBegin; 6135 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6136 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6137 i[0] = 0; 6138 for (r = 0; r < m; r++) { /* Do row by row merging */ 6139 b1 = rowBegin1[r]; 6140 e1 = rowEnd1[r]; 6141 b2 = rowBegin2[r]; 6142 e2 = rowEnd2[r]; 6143 while (b1 < e1 && b2 < e2) { 6144 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6145 j[t] = j1[b1]; 6146 imap1[t1] = t; 6147 imap2[t2] = t; 6148 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6149 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6150 t1++; 6151 t2++; 6152 t++; 6153 } else if (j1[b1] < j2[b2]) { 6154 j[t] = j1[b1]; 6155 imap1[t1] = t; 6156 b1 += jmap1[t1 + 1] - jmap1[t1]; 6157 t1++; 6158 t++; 6159 } else { 6160 j[t] = j2[b2]; 6161 imap2[t2] = t; 6162 b2 += jmap2[t2 + 1] - jmap2[t2]; 6163 t2++; 6164 t++; 6165 } 6166 } 6167 /* Merge the remaining in either j1[] or j2[] */ 6168 while (b1 < e1) { 6169 j[t] = j1[b1]; 6170 imap1[t1] = t; 6171 b1 += jmap1[t1 + 1] - jmap1[t1]; 6172 t1++; 6173 t++; 6174 } 6175 while (b2 < e2) { 6176 j[t] = j2[b2]; 6177 imap2[t2] = t; 6178 b2 += jmap2[t2 + 1] - jmap2[t2]; 6179 t2++; 6180 t++; 6181 } 6182 i[r + 1] = t; 6183 } 6184 PetscFunctionReturn(PETSC_SUCCESS); 6185 } 6186 6187 /* 6188 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6189 6190 Input Parameters: 6191 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6192 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6193 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6194 6195 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6196 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6197 6198 Output Parameters: 6199 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6200 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6201 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6202 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6203 6204 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6205 Atot: number of entries belonging to the diagonal block. 6206 Annz: number of unique nonzeros belonging to the diagonal block. 6207 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6208 repeats (i.e., same 'i,j' pair). 6209 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6210 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6211 6212 Atot: number of entries belonging to the diagonal block 6213 Annz: number of unique nonzeros belonging to the diagonal block. 6214 6215 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6216 6217 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6218 */ 6219 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6220 { 6221 PetscInt cstart, cend, rstart, rend, row, col; 6222 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6223 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6224 PetscCount k, m, p, q, r, s, mid; 6225 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6226 6227 PetscFunctionBegin; 6228 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6229 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6230 m = rend - rstart; 6231 6232 /* Skip negative rows */ 6233 for (k = 0; k < n; k++) 6234 if (i[k] >= 0) break; 6235 6236 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6237 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6238 */ 6239 while (k < n) { 6240 row = i[k]; 6241 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6242 for (s = k; s < n; s++) 6243 if (i[s] != row) break; 6244 6245 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6246 for (p = k; p < s; p++) { 6247 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6248 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6249 } 6250 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6251 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6252 rowBegin[row - rstart] = k; 6253 rowMid[row - rstart] = mid; 6254 rowEnd[row - rstart] = s; 6255 6256 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6257 Atot += mid - k; 6258 Btot += s - mid; 6259 6260 /* Count unique nonzeros of this diag row */ 6261 for (p = k; p < mid;) { 6262 col = j[p]; 6263 do { 6264 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6265 p++; 6266 } while (p < mid && j[p] == col); 6267 Annz++; 6268 } 6269 6270 /* Count unique nonzeros of this offdiag row */ 6271 for (p = mid; p < s;) { 6272 col = j[p]; 6273 do { 6274 p++; 6275 } while (p < s && j[p] == col); 6276 Bnnz++; 6277 } 6278 k = s; 6279 } 6280 6281 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6282 PetscCall(PetscMalloc1(Atot, &Aperm)); 6283 PetscCall(PetscMalloc1(Btot, &Bperm)); 6284 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6285 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6286 6287 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6288 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6289 for (r = 0; r < m; r++) { 6290 k = rowBegin[r]; 6291 mid = rowMid[r]; 6292 s = rowEnd[r]; 6293 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6294 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6295 Atot += mid - k; 6296 Btot += s - mid; 6297 6298 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6299 for (p = k; p < mid;) { 6300 col = j[p]; 6301 q = p; 6302 do { 6303 p++; 6304 } while (p < mid && j[p] == col); 6305 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6306 Annz++; 6307 } 6308 6309 for (p = mid; p < s;) { 6310 col = j[p]; 6311 q = p; 6312 do { 6313 p++; 6314 } while (p < s && j[p] == col); 6315 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6316 Bnnz++; 6317 } 6318 } 6319 /* Output */ 6320 *Aperm_ = Aperm; 6321 *Annz_ = Annz; 6322 *Atot_ = Atot; 6323 *Ajmap_ = Ajmap; 6324 *Bperm_ = Bperm; 6325 *Bnnz_ = Bnnz; 6326 *Btot_ = Btot; 6327 *Bjmap_ = Bjmap; 6328 PetscFunctionReturn(PETSC_SUCCESS); 6329 } 6330 6331 /* 6332 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6333 6334 Input Parameters: 6335 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6336 nnz: number of unique nonzeros in the merged matrix 6337 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6338 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6339 6340 Output Parameter: (memory is allocated by the caller) 6341 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6342 6343 Example: 6344 nnz1 = 4 6345 nnz = 6 6346 imap = [1,3,4,5] 6347 jmap = [0,3,5,6,7] 6348 then, 6349 jmap_new = [0,0,3,3,5,6,7] 6350 */ 6351 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6352 { 6353 PetscCount k, p; 6354 6355 PetscFunctionBegin; 6356 jmap_new[0] = 0; 6357 p = nnz; /* p loops over jmap_new[] backwards */ 6358 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6359 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6360 } 6361 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6362 PetscFunctionReturn(PETSC_SUCCESS); 6363 } 6364 6365 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6366 { 6367 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6368 6369 PetscFunctionBegin; 6370 PetscCall(PetscSFDestroy(&coo->sf)); 6371 PetscCall(PetscFree(coo->Aperm1)); 6372 PetscCall(PetscFree(coo->Bperm1)); 6373 PetscCall(PetscFree(coo->Ajmap1)); 6374 PetscCall(PetscFree(coo->Bjmap1)); 6375 PetscCall(PetscFree(coo->Aimap2)); 6376 PetscCall(PetscFree(coo->Bimap2)); 6377 PetscCall(PetscFree(coo->Aperm2)); 6378 PetscCall(PetscFree(coo->Bperm2)); 6379 PetscCall(PetscFree(coo->Ajmap2)); 6380 PetscCall(PetscFree(coo->Bjmap2)); 6381 PetscCall(PetscFree(coo->Cperm1)); 6382 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6383 PetscCall(PetscFree(coo)); 6384 PetscFunctionReturn(PETSC_SUCCESS); 6385 } 6386 6387 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6388 { 6389 MPI_Comm comm; 6390 PetscMPIInt rank, size; 6391 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6392 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6393 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6394 PetscContainer container; 6395 MatCOOStruct_MPIAIJ *coo; 6396 6397 PetscFunctionBegin; 6398 PetscCall(PetscFree(mpiaij->garray)); 6399 PetscCall(VecDestroy(&mpiaij->lvec)); 6400 #if defined(PETSC_USE_CTABLE) 6401 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6402 #else 6403 PetscCall(PetscFree(mpiaij->colmap)); 6404 #endif 6405 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6406 mat->assembled = PETSC_FALSE; 6407 mat->was_assembled = PETSC_FALSE; 6408 6409 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6410 PetscCallMPI(MPI_Comm_size(comm, &size)); 6411 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6412 PetscCall(PetscLayoutSetUp(mat->rmap)); 6413 PetscCall(PetscLayoutSetUp(mat->cmap)); 6414 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6415 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6416 PetscCall(MatGetLocalSize(mat, &m, &n)); 6417 PetscCall(MatGetSize(mat, &M, &N)); 6418 6419 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6420 /* entries come first, then local rows, then remote rows. */ 6421 PetscCount n1 = coo_n, *perm1; 6422 PetscInt *i1 = coo_i, *j1 = coo_j; 6423 6424 PetscCall(PetscMalloc1(n1, &perm1)); 6425 for (k = 0; k < n1; k++) perm1[k] = k; 6426 6427 /* Manipulate indices so that entries with negative row or col indices will have smallest 6428 row indices, local entries will have greater but negative row indices, and remote entries 6429 will have positive row indices. 6430 */ 6431 for (k = 0; k < n1; k++) { 6432 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6433 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6434 else { 6435 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6436 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6437 } 6438 } 6439 6440 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6441 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6442 6443 /* Advance k to the first entry we need to take care of */ 6444 for (k = 0; k < n1; k++) 6445 if (i1[k] > PETSC_MIN_INT) break; 6446 PetscInt i1start = k; 6447 6448 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6449 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6450 6451 /* Send remote rows to their owner */ 6452 /* Find which rows should be sent to which remote ranks*/ 6453 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6454 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6455 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6456 const PetscInt *ranges; 6457 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6458 6459 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6460 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6461 for (k = rem; k < n1;) { 6462 PetscMPIInt owner; 6463 PetscInt firstRow, lastRow; 6464 6465 /* Locate a row range */ 6466 firstRow = i1[k]; /* first row of this owner */ 6467 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6468 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6469 6470 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6471 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6472 6473 /* All entries in [k,p) belong to this remote owner */ 6474 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6475 PetscMPIInt *sendto2; 6476 PetscInt *nentries2; 6477 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6478 6479 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6480 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6481 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6482 PetscCall(PetscFree2(sendto, nentries2)); 6483 sendto = sendto2; 6484 nentries = nentries2; 6485 maxNsend = maxNsend2; 6486 } 6487 sendto[nsend] = owner; 6488 nentries[nsend] = p - k; 6489 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6490 nsend++; 6491 k = p; 6492 } 6493 6494 /* Build 1st SF to know offsets on remote to send data */ 6495 PetscSF sf1; 6496 PetscInt nroots = 1, nroots2 = 0; 6497 PetscInt nleaves = nsend, nleaves2 = 0; 6498 PetscInt *offsets; 6499 PetscSFNode *iremote; 6500 6501 PetscCall(PetscSFCreate(comm, &sf1)); 6502 PetscCall(PetscMalloc1(nsend, &iremote)); 6503 PetscCall(PetscMalloc1(nsend, &offsets)); 6504 for (k = 0; k < nsend; k++) { 6505 iremote[k].rank = sendto[k]; 6506 iremote[k].index = 0; 6507 nleaves2 += nentries[k]; 6508 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6509 } 6510 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6511 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6512 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6513 PetscCall(PetscSFDestroy(&sf1)); 6514 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6515 6516 /* Build 2nd SF to send remote COOs to their owner */ 6517 PetscSF sf2; 6518 nroots = nroots2; 6519 nleaves = nleaves2; 6520 PetscCall(PetscSFCreate(comm, &sf2)); 6521 PetscCall(PetscSFSetFromOptions(sf2)); 6522 PetscCall(PetscMalloc1(nleaves, &iremote)); 6523 p = 0; 6524 for (k = 0; k < nsend; k++) { 6525 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6526 for (q = 0; q < nentries[k]; q++, p++) { 6527 iremote[p].rank = sendto[k]; 6528 iremote[p].index = offsets[k] + q; 6529 } 6530 } 6531 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6532 6533 /* Send the remote COOs to their owner */ 6534 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6535 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6536 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6537 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6538 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6539 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6540 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6541 6542 PetscCall(PetscFree(offsets)); 6543 PetscCall(PetscFree2(sendto, nentries)); 6544 6545 /* Sort received COOs by row along with the permutation array */ 6546 for (k = 0; k < n2; k++) perm2[k] = k; 6547 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6548 6549 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6550 PetscCount *Cperm1; 6551 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6552 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves)); 6553 6554 /* Support for HYPRE matrices, kind of a hack. 6555 Swap min column with diagonal so that diagonal values will go first */ 6556 PetscBool hypre; 6557 const char *name; 6558 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6559 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6560 if (hypre) { 6561 PetscInt *minj; 6562 PetscBT hasdiag; 6563 6564 PetscCall(PetscBTCreate(m, &hasdiag)); 6565 PetscCall(PetscMalloc1(m, &minj)); 6566 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6567 for (k = i1start; k < rem; k++) { 6568 if (j1[k] < cstart || j1[k] >= cend) continue; 6569 const PetscInt rindex = i1[k] - rstart; 6570 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6571 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6572 } 6573 for (k = 0; k < n2; k++) { 6574 if (j2[k] < cstart || j2[k] >= cend) continue; 6575 const PetscInt rindex = i2[k] - rstart; 6576 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6577 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6578 } 6579 for (k = i1start; k < rem; k++) { 6580 const PetscInt rindex = i1[k] - rstart; 6581 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6582 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6583 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6584 } 6585 for (k = 0; k < n2; k++) { 6586 const PetscInt rindex = i2[k] - rstart; 6587 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6588 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6589 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6590 } 6591 PetscCall(PetscBTDestroy(&hasdiag)); 6592 PetscCall(PetscFree(minj)); 6593 } 6594 6595 /* Split local COOs and received COOs into diag/offdiag portions */ 6596 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6597 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6598 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6599 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6600 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6601 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6602 6603 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6604 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6605 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6606 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6607 6608 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6609 PetscInt *Ai, *Bi; 6610 PetscInt *Aj, *Bj; 6611 6612 PetscCall(PetscMalloc1(m + 1, &Ai)); 6613 PetscCall(PetscMalloc1(m + 1, &Bi)); 6614 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6615 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6616 6617 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6618 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6619 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6620 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6621 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6622 6623 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6624 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6625 6626 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6627 /* expect nonzeros in A/B most likely have local contributing entries */ 6628 PetscInt Annz = Ai[m]; 6629 PetscInt Bnnz = Bi[m]; 6630 PetscCount *Ajmap1_new, *Bjmap1_new; 6631 6632 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6633 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6634 6635 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6636 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6637 6638 PetscCall(PetscFree(Aimap1)); 6639 PetscCall(PetscFree(Ajmap1)); 6640 PetscCall(PetscFree(Bimap1)); 6641 PetscCall(PetscFree(Bjmap1)); 6642 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6643 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6644 PetscCall(PetscFree(perm1)); 6645 PetscCall(PetscFree3(i2, j2, perm2)); 6646 6647 Ajmap1 = Ajmap1_new; 6648 Bjmap1 = Bjmap1_new; 6649 6650 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6651 if (Annz < Annz1 + Annz2) { 6652 PetscInt *Aj_new; 6653 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6654 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6655 PetscCall(PetscFree(Aj)); 6656 Aj = Aj_new; 6657 } 6658 6659 if (Bnnz < Bnnz1 + Bnnz2) { 6660 PetscInt *Bj_new; 6661 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6662 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6663 PetscCall(PetscFree(Bj)); 6664 Bj = Bj_new; 6665 } 6666 6667 /* Create new submatrices for on-process and off-process coupling */ 6668 PetscScalar *Aa, *Ba; 6669 MatType rtype; 6670 Mat_SeqAIJ *a, *b; 6671 PetscObjectState state; 6672 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6673 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6674 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6675 if (cstart) { 6676 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6677 } 6678 PetscCall(MatDestroy(&mpiaij->A)); 6679 PetscCall(MatDestroy(&mpiaij->B)); 6680 PetscCall(MatGetRootType_Private(mat, &rtype)); 6681 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6682 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6683 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6684 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6685 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6686 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6687 6688 a = (Mat_SeqAIJ *)mpiaij->A->data; 6689 b = (Mat_SeqAIJ *)mpiaij->B->data; 6690 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6691 a->free_a = b->free_a = PETSC_TRUE; 6692 a->free_ij = b->free_ij = PETSC_TRUE; 6693 6694 /* conversion must happen AFTER multiply setup */ 6695 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6696 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6697 PetscCall(VecDestroy(&mpiaij->lvec)); 6698 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6699 6700 // Put the COO struct in a container and then attach that to the matrix 6701 PetscCall(PetscMalloc1(1, &coo)); 6702 coo->n = coo_n; 6703 coo->sf = sf2; 6704 coo->sendlen = nleaves; 6705 coo->recvlen = nroots; 6706 coo->Annz = Annz; 6707 coo->Bnnz = Bnnz; 6708 coo->Annz2 = Annz2; 6709 coo->Bnnz2 = Bnnz2; 6710 coo->Atot1 = Atot1; 6711 coo->Atot2 = Atot2; 6712 coo->Btot1 = Btot1; 6713 coo->Btot2 = Btot2; 6714 coo->Ajmap1 = Ajmap1; 6715 coo->Aperm1 = Aperm1; 6716 coo->Bjmap1 = Bjmap1; 6717 coo->Bperm1 = Bperm1; 6718 coo->Aimap2 = Aimap2; 6719 coo->Ajmap2 = Ajmap2; 6720 coo->Aperm2 = Aperm2; 6721 coo->Bimap2 = Bimap2; 6722 coo->Bjmap2 = Bjmap2; 6723 coo->Bperm2 = Bperm2; 6724 coo->Cperm1 = Cperm1; 6725 // Allocate in preallocation. If not used, it has zero cost on host 6726 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6727 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6728 PetscCall(PetscContainerSetPointer(container, coo)); 6729 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6730 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6731 PetscCall(PetscContainerDestroy(&container)); 6732 PetscFunctionReturn(PETSC_SUCCESS); 6733 } 6734 6735 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6736 { 6737 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6738 Mat A = mpiaij->A, B = mpiaij->B; 6739 PetscScalar *Aa, *Ba; 6740 PetscScalar *sendbuf, *recvbuf; 6741 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6742 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6743 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6744 const PetscCount *Cperm1; 6745 PetscContainer container; 6746 MatCOOStruct_MPIAIJ *coo; 6747 6748 PetscFunctionBegin; 6749 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6750 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6751 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6752 sendbuf = coo->sendbuf; 6753 recvbuf = coo->recvbuf; 6754 Ajmap1 = coo->Ajmap1; 6755 Ajmap2 = coo->Ajmap2; 6756 Aimap2 = coo->Aimap2; 6757 Bjmap1 = coo->Bjmap1; 6758 Bjmap2 = coo->Bjmap2; 6759 Bimap2 = coo->Bimap2; 6760 Aperm1 = coo->Aperm1; 6761 Aperm2 = coo->Aperm2; 6762 Bperm1 = coo->Bperm1; 6763 Bperm2 = coo->Bperm2; 6764 Cperm1 = coo->Cperm1; 6765 6766 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6767 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6768 6769 /* Pack entries to be sent to remote */ 6770 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6771 6772 /* Send remote entries to their owner and overlap the communication with local computation */ 6773 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6774 /* Add local entries to A and B */ 6775 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6776 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6777 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6778 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6779 } 6780 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6781 PetscScalar sum = 0.0; 6782 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6783 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6784 } 6785 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6786 6787 /* Add received remote entries to A and B */ 6788 for (PetscCount i = 0; i < coo->Annz2; i++) { 6789 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6790 } 6791 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6792 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6793 } 6794 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6795 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6796 PetscFunctionReturn(PETSC_SUCCESS); 6797 } 6798 6799 /*MC 6800 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6801 6802 Options Database Keys: 6803 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6804 6805 Level: beginner 6806 6807 Notes: 6808 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6809 in this case the values associated with the rows and columns one passes in are set to zero 6810 in the matrix 6811 6812 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6813 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6814 6815 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6816 M*/ 6817 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6818 { 6819 Mat_MPIAIJ *b; 6820 PetscMPIInt size; 6821 6822 PetscFunctionBegin; 6823 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6824 6825 PetscCall(PetscNew(&b)); 6826 B->data = (void *)b; 6827 B->ops[0] = MatOps_Values; 6828 B->assembled = PETSC_FALSE; 6829 B->insertmode = NOT_SET_VALUES; 6830 b->size = size; 6831 6832 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6833 6834 /* build cache for off array entries formed */ 6835 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6836 6837 b->donotstash = PETSC_FALSE; 6838 b->colmap = NULL; 6839 b->garray = NULL; 6840 b->roworiented = PETSC_TRUE; 6841 6842 /* stuff used for matrix vector multiply */ 6843 b->lvec = NULL; 6844 b->Mvctx = NULL; 6845 6846 /* stuff for MatGetRow() */ 6847 b->rowindices = NULL; 6848 b->rowvalues = NULL; 6849 b->getrowactive = PETSC_FALSE; 6850 6851 /* flexible pointer used in CUSPARSE classes */ 6852 b->spptr = NULL; 6853 6854 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6859 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6860 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6861 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6862 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6863 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6864 #if defined(PETSC_HAVE_CUDA) 6865 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6866 #endif 6867 #if defined(PETSC_HAVE_HIP) 6868 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6869 #endif 6870 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6871 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6872 #endif 6873 #if defined(PETSC_HAVE_MKL_SPARSE) 6874 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6875 #endif 6876 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6877 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6878 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6879 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6880 #if defined(PETSC_HAVE_ELEMENTAL) 6881 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6882 #endif 6883 #if defined(PETSC_HAVE_SCALAPACK) 6884 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6885 #endif 6886 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6887 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6888 #if defined(PETSC_HAVE_HYPRE) 6889 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6891 #endif 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6896 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6897 PetscFunctionReturn(PETSC_SUCCESS); 6898 } 6899 6900 /*@C 6901 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6902 and "off-diagonal" part of the matrix in CSR format. 6903 6904 Collective 6905 6906 Input Parameters: 6907 + comm - MPI communicator 6908 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6909 . n - This value should be the same as the local size used in creating the 6910 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6911 calculated if `N` is given) For square matrices `n` is almost always `m`. 6912 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6913 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6914 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6915 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6916 . a - matrix values 6917 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6918 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6919 - oa - matrix values 6920 6921 Output Parameter: 6922 . mat - the matrix 6923 6924 Level: advanced 6925 6926 Notes: 6927 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6928 must free the arrays once the matrix has been destroyed and not before. 6929 6930 The `i` and `j` indices are 0 based 6931 6932 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6933 6934 This sets local rows and cannot be used to set off-processor values. 6935 6936 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6937 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6938 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6939 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6940 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6941 communication if it is known that only local entries will be set. 6942 6943 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6944 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6945 @*/ 6946 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6947 { 6948 Mat_MPIAIJ *maij; 6949 6950 PetscFunctionBegin; 6951 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6952 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6953 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6954 PetscCall(MatCreate(comm, mat)); 6955 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6956 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6957 maij = (Mat_MPIAIJ *)(*mat)->data; 6958 6959 (*mat)->preallocated = PETSC_TRUE; 6960 6961 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6962 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6963 6964 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6965 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6966 6967 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6968 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6969 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6970 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6971 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6972 PetscFunctionReturn(PETSC_SUCCESS); 6973 } 6974 6975 typedef struct { 6976 Mat *mp; /* intermediate products */ 6977 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6978 PetscInt cp; /* number of intermediate products */ 6979 6980 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6981 PetscInt *startsj_s, *startsj_r; 6982 PetscScalar *bufa; 6983 Mat P_oth; 6984 6985 /* may take advantage of merging product->B */ 6986 Mat Bloc; /* B-local by merging diag and off-diag */ 6987 6988 /* cusparse does not have support to split between symbolic and numeric phases. 6989 When api_user is true, we don't need to update the numerical values 6990 of the temporary storage */ 6991 PetscBool reusesym; 6992 6993 /* support for COO values insertion */ 6994 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6995 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6996 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6997 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6998 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6999 PetscMemType mtype; 7000 7001 /* customization */ 7002 PetscBool abmerge; 7003 PetscBool P_oth_bind; 7004 } MatMatMPIAIJBACKEND; 7005 7006 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7007 { 7008 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7009 PetscInt i; 7010 7011 PetscFunctionBegin; 7012 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7013 PetscCall(PetscFree(mmdata->bufa)); 7014 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7015 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7016 PetscCall(MatDestroy(&mmdata->P_oth)); 7017 PetscCall(MatDestroy(&mmdata->Bloc)); 7018 PetscCall(PetscSFDestroy(&mmdata->sf)); 7019 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7020 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7021 PetscCall(PetscFree(mmdata->own[0])); 7022 PetscCall(PetscFree(mmdata->own)); 7023 PetscCall(PetscFree(mmdata->off[0])); 7024 PetscCall(PetscFree(mmdata->off)); 7025 PetscCall(PetscFree(mmdata)); 7026 PetscFunctionReturn(PETSC_SUCCESS); 7027 } 7028 7029 /* Copy selected n entries with indices in idx[] of A to v[]. 7030 If idx is NULL, copy the whole data array of A to v[] 7031 */ 7032 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7033 { 7034 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7035 7036 PetscFunctionBegin; 7037 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7038 if (f) { 7039 PetscCall((*f)(A, n, idx, v)); 7040 } else { 7041 const PetscScalar *vv; 7042 7043 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7044 if (n && idx) { 7045 PetscScalar *w = v; 7046 const PetscInt *oi = idx; 7047 PetscInt j; 7048 7049 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7050 } else { 7051 PetscCall(PetscArraycpy(v, vv, n)); 7052 } 7053 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7054 } 7055 PetscFunctionReturn(PETSC_SUCCESS); 7056 } 7057 7058 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7059 { 7060 MatMatMPIAIJBACKEND *mmdata; 7061 PetscInt i, n_d, n_o; 7062 7063 PetscFunctionBegin; 7064 MatCheckProduct(C, 1); 7065 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7066 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7067 if (!mmdata->reusesym) { /* update temporary matrices */ 7068 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7069 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7070 } 7071 mmdata->reusesym = PETSC_FALSE; 7072 7073 for (i = 0; i < mmdata->cp; i++) { 7074 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7075 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7076 } 7077 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7078 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7079 7080 if (mmdata->mptmp[i]) continue; 7081 if (noff) { 7082 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7083 7084 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7085 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7086 n_o += noff; 7087 n_d += nown; 7088 } else { 7089 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7090 7091 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7092 n_d += mm->nz; 7093 } 7094 } 7095 if (mmdata->hasoffproc) { /* offprocess insertion */ 7096 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7097 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7098 } 7099 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7100 PetscFunctionReturn(PETSC_SUCCESS); 7101 } 7102 7103 /* Support for Pt * A, A * P, or Pt * A * P */ 7104 #define MAX_NUMBER_INTERMEDIATE 4 7105 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7106 { 7107 Mat_Product *product = C->product; 7108 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7109 Mat_MPIAIJ *a, *p; 7110 MatMatMPIAIJBACKEND *mmdata; 7111 ISLocalToGlobalMapping P_oth_l2g = NULL; 7112 IS glob = NULL; 7113 const char *prefix; 7114 char pprefix[256]; 7115 const PetscInt *globidx, *P_oth_idx; 7116 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7117 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7118 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7119 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7120 /* a base offset; type-2: sparse with a local to global map table */ 7121 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7122 7123 MatProductType ptype; 7124 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7125 PetscMPIInt size; 7126 7127 PetscFunctionBegin; 7128 MatCheckProduct(C, 1); 7129 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7130 ptype = product->type; 7131 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7132 ptype = MATPRODUCT_AB; 7133 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7134 } 7135 switch (ptype) { 7136 case MATPRODUCT_AB: 7137 A = product->A; 7138 P = product->B; 7139 m = A->rmap->n; 7140 n = P->cmap->n; 7141 M = A->rmap->N; 7142 N = P->cmap->N; 7143 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7144 break; 7145 case MATPRODUCT_AtB: 7146 P = product->A; 7147 A = product->B; 7148 m = P->cmap->n; 7149 n = A->cmap->n; 7150 M = P->cmap->N; 7151 N = A->cmap->N; 7152 hasoffproc = PETSC_TRUE; 7153 break; 7154 case MATPRODUCT_PtAP: 7155 A = product->A; 7156 P = product->B; 7157 m = P->cmap->n; 7158 n = P->cmap->n; 7159 M = P->cmap->N; 7160 N = P->cmap->N; 7161 hasoffproc = PETSC_TRUE; 7162 break; 7163 default: 7164 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7165 } 7166 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7167 if (size == 1) hasoffproc = PETSC_FALSE; 7168 7169 /* defaults */ 7170 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7171 mp[i] = NULL; 7172 mptmp[i] = PETSC_FALSE; 7173 rmapt[i] = -1; 7174 cmapt[i] = -1; 7175 rmapa[i] = NULL; 7176 cmapa[i] = NULL; 7177 } 7178 7179 /* customization */ 7180 PetscCall(PetscNew(&mmdata)); 7181 mmdata->reusesym = product->api_user; 7182 if (ptype == MATPRODUCT_AB) { 7183 if (product->api_user) { 7184 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7185 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7186 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7187 PetscOptionsEnd(); 7188 } else { 7189 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7190 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7191 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7192 PetscOptionsEnd(); 7193 } 7194 } else if (ptype == MATPRODUCT_PtAP) { 7195 if (product->api_user) { 7196 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7197 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7198 PetscOptionsEnd(); 7199 } else { 7200 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7201 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7202 PetscOptionsEnd(); 7203 } 7204 } 7205 a = (Mat_MPIAIJ *)A->data; 7206 p = (Mat_MPIAIJ *)P->data; 7207 PetscCall(MatSetSizes(C, m, n, M, N)); 7208 PetscCall(PetscLayoutSetUp(C->rmap)); 7209 PetscCall(PetscLayoutSetUp(C->cmap)); 7210 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7211 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7212 7213 cp = 0; 7214 switch (ptype) { 7215 case MATPRODUCT_AB: /* A * P */ 7216 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7217 7218 /* A_diag * P_local (merged or not) */ 7219 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7220 /* P is product->B */ 7221 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7222 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7223 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7224 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7225 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7226 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7227 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7228 mp[cp]->product->api_user = product->api_user; 7229 PetscCall(MatProductSetFromOptions(mp[cp])); 7230 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7231 PetscCall(ISGetIndices(glob, &globidx)); 7232 rmapt[cp] = 1; 7233 cmapt[cp] = 2; 7234 cmapa[cp] = globidx; 7235 mptmp[cp] = PETSC_FALSE; 7236 cp++; 7237 } else { /* A_diag * P_diag and A_diag * P_off */ 7238 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7239 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7240 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7241 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7242 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7243 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7244 mp[cp]->product->api_user = product->api_user; 7245 PetscCall(MatProductSetFromOptions(mp[cp])); 7246 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7247 rmapt[cp] = 1; 7248 cmapt[cp] = 1; 7249 mptmp[cp] = PETSC_FALSE; 7250 cp++; 7251 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7252 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7253 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7254 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7255 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7256 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7257 mp[cp]->product->api_user = product->api_user; 7258 PetscCall(MatProductSetFromOptions(mp[cp])); 7259 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7260 rmapt[cp] = 1; 7261 cmapt[cp] = 2; 7262 cmapa[cp] = p->garray; 7263 mptmp[cp] = PETSC_FALSE; 7264 cp++; 7265 } 7266 7267 /* A_off * P_other */ 7268 if (mmdata->P_oth) { 7269 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7270 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7271 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7272 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7273 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7274 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7275 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7276 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7277 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7278 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7279 mp[cp]->product->api_user = product->api_user; 7280 PetscCall(MatProductSetFromOptions(mp[cp])); 7281 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7282 rmapt[cp] = 1; 7283 cmapt[cp] = 2; 7284 cmapa[cp] = P_oth_idx; 7285 mptmp[cp] = PETSC_FALSE; 7286 cp++; 7287 } 7288 break; 7289 7290 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7291 /* A is product->B */ 7292 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7293 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7294 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7295 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7296 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7297 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7298 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7299 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7300 mp[cp]->product->api_user = product->api_user; 7301 PetscCall(MatProductSetFromOptions(mp[cp])); 7302 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7303 PetscCall(ISGetIndices(glob, &globidx)); 7304 rmapt[cp] = 2; 7305 rmapa[cp] = globidx; 7306 cmapt[cp] = 2; 7307 cmapa[cp] = globidx; 7308 mptmp[cp] = PETSC_FALSE; 7309 cp++; 7310 } else { 7311 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7312 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7313 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7314 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7315 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7316 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7317 mp[cp]->product->api_user = product->api_user; 7318 PetscCall(MatProductSetFromOptions(mp[cp])); 7319 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7320 PetscCall(ISGetIndices(glob, &globidx)); 7321 rmapt[cp] = 1; 7322 cmapt[cp] = 2; 7323 cmapa[cp] = globidx; 7324 mptmp[cp] = PETSC_FALSE; 7325 cp++; 7326 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7327 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7328 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7329 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7330 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7331 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7332 mp[cp]->product->api_user = product->api_user; 7333 PetscCall(MatProductSetFromOptions(mp[cp])); 7334 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7335 rmapt[cp] = 2; 7336 rmapa[cp] = p->garray; 7337 cmapt[cp] = 2; 7338 cmapa[cp] = globidx; 7339 mptmp[cp] = PETSC_FALSE; 7340 cp++; 7341 } 7342 break; 7343 case MATPRODUCT_PtAP: 7344 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7345 /* P is product->B */ 7346 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7347 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7348 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7349 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7350 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7351 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7352 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7353 mp[cp]->product->api_user = product->api_user; 7354 PetscCall(MatProductSetFromOptions(mp[cp])); 7355 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7356 PetscCall(ISGetIndices(glob, &globidx)); 7357 rmapt[cp] = 2; 7358 rmapa[cp] = globidx; 7359 cmapt[cp] = 2; 7360 cmapa[cp] = globidx; 7361 mptmp[cp] = PETSC_FALSE; 7362 cp++; 7363 if (mmdata->P_oth) { 7364 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7365 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7366 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7367 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7368 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7369 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7370 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7371 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7372 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7373 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7374 mp[cp]->product->api_user = product->api_user; 7375 PetscCall(MatProductSetFromOptions(mp[cp])); 7376 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7377 mptmp[cp] = PETSC_TRUE; 7378 cp++; 7379 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7380 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7381 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7382 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7383 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7384 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7385 mp[cp]->product->api_user = product->api_user; 7386 PetscCall(MatProductSetFromOptions(mp[cp])); 7387 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7388 rmapt[cp] = 2; 7389 rmapa[cp] = globidx; 7390 cmapt[cp] = 2; 7391 cmapa[cp] = P_oth_idx; 7392 mptmp[cp] = PETSC_FALSE; 7393 cp++; 7394 } 7395 break; 7396 default: 7397 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7398 } 7399 /* sanity check */ 7400 if (size > 1) 7401 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7402 7403 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7404 for (i = 0; i < cp; i++) { 7405 mmdata->mp[i] = mp[i]; 7406 mmdata->mptmp[i] = mptmp[i]; 7407 } 7408 mmdata->cp = cp; 7409 C->product->data = mmdata; 7410 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7411 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7412 7413 /* memory type */ 7414 mmdata->mtype = PETSC_MEMTYPE_HOST; 7415 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7416 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7417 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7418 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7419 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7420 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7421 7422 /* prepare coo coordinates for values insertion */ 7423 7424 /* count total nonzeros of those intermediate seqaij Mats 7425 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7426 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7427 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7428 */ 7429 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7430 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7431 if (mptmp[cp]) continue; 7432 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7433 const PetscInt *rmap = rmapa[cp]; 7434 const PetscInt mr = mp[cp]->rmap->n; 7435 const PetscInt rs = C->rmap->rstart; 7436 const PetscInt re = C->rmap->rend; 7437 const PetscInt *ii = mm->i; 7438 for (i = 0; i < mr; i++) { 7439 const PetscInt gr = rmap[i]; 7440 const PetscInt nz = ii[i + 1] - ii[i]; 7441 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7442 else ncoo_oown += nz; /* this row is local */ 7443 } 7444 } else ncoo_d += mm->nz; 7445 } 7446 7447 /* 7448 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7449 7450 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7451 7452 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7453 7454 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7455 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7456 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7457 7458 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7459 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7460 */ 7461 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7462 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7463 7464 /* gather (i,j) of nonzeros inserted by remote procs */ 7465 if (hasoffproc) { 7466 PetscSF msf; 7467 PetscInt ncoo2, *coo_i2, *coo_j2; 7468 7469 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7470 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7471 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7472 7473 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7474 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7475 PetscInt *idxoff = mmdata->off[cp]; 7476 PetscInt *idxown = mmdata->own[cp]; 7477 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7478 const PetscInt *rmap = rmapa[cp]; 7479 const PetscInt *cmap = cmapa[cp]; 7480 const PetscInt *ii = mm->i; 7481 PetscInt *coi = coo_i + ncoo_o; 7482 PetscInt *coj = coo_j + ncoo_o; 7483 const PetscInt mr = mp[cp]->rmap->n; 7484 const PetscInt rs = C->rmap->rstart; 7485 const PetscInt re = C->rmap->rend; 7486 const PetscInt cs = C->cmap->rstart; 7487 for (i = 0; i < mr; i++) { 7488 const PetscInt *jj = mm->j + ii[i]; 7489 const PetscInt gr = rmap[i]; 7490 const PetscInt nz = ii[i + 1] - ii[i]; 7491 if (gr < rs || gr >= re) { /* this is an offproc row */ 7492 for (j = ii[i]; j < ii[i + 1]; j++) { 7493 *coi++ = gr; 7494 *idxoff++ = j; 7495 } 7496 if (!cmapt[cp]) { /* already global */ 7497 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7498 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7499 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7500 } else { /* offdiag */ 7501 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7502 } 7503 ncoo_o += nz; 7504 } else { /* this is a local row */ 7505 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7506 } 7507 } 7508 } 7509 mmdata->off[cp + 1] = idxoff; 7510 mmdata->own[cp + 1] = idxown; 7511 } 7512 7513 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7514 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7515 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7516 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7517 ncoo = ncoo_d + ncoo_oown + ncoo2; 7518 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7519 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7520 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7521 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7522 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7523 PetscCall(PetscFree2(coo_i, coo_j)); 7524 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7525 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7526 coo_i = coo_i2; 7527 coo_j = coo_j2; 7528 } else { /* no offproc values insertion */ 7529 ncoo = ncoo_d; 7530 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7531 7532 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7533 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7534 PetscCall(PetscSFSetUp(mmdata->sf)); 7535 } 7536 mmdata->hasoffproc = hasoffproc; 7537 7538 /* gather (i,j) of nonzeros inserted locally */ 7539 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7540 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7541 PetscInt *coi = coo_i + ncoo_d; 7542 PetscInt *coj = coo_j + ncoo_d; 7543 const PetscInt *jj = mm->j; 7544 const PetscInt *ii = mm->i; 7545 const PetscInt *cmap = cmapa[cp]; 7546 const PetscInt *rmap = rmapa[cp]; 7547 const PetscInt mr = mp[cp]->rmap->n; 7548 const PetscInt rs = C->rmap->rstart; 7549 const PetscInt re = C->rmap->rend; 7550 const PetscInt cs = C->cmap->rstart; 7551 7552 if (mptmp[cp]) continue; 7553 if (rmapt[cp] == 1) { /* consecutive rows */ 7554 /* fill coo_i */ 7555 for (i = 0; i < mr; i++) { 7556 const PetscInt gr = i + rs; 7557 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7558 } 7559 /* fill coo_j */ 7560 if (!cmapt[cp]) { /* type-0, already global */ 7561 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7562 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7563 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7564 } else { /* type-2, local to global for sparse columns */ 7565 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7566 } 7567 ncoo_d += mm->nz; 7568 } else if (rmapt[cp] == 2) { /* sparse rows */ 7569 for (i = 0; i < mr; i++) { 7570 const PetscInt *jj = mm->j + ii[i]; 7571 const PetscInt gr = rmap[i]; 7572 const PetscInt nz = ii[i + 1] - ii[i]; 7573 if (gr >= rs && gr < re) { /* local rows */ 7574 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7575 if (!cmapt[cp]) { /* type-0, already global */ 7576 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7577 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7578 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7579 } else { /* type-2, local to global for sparse columns */ 7580 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7581 } 7582 ncoo_d += nz; 7583 } 7584 } 7585 } 7586 } 7587 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7588 PetscCall(ISDestroy(&glob)); 7589 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7590 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7591 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7592 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7593 7594 /* preallocate with COO data */ 7595 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7596 PetscCall(PetscFree2(coo_i, coo_j)); 7597 PetscFunctionReturn(PETSC_SUCCESS); 7598 } 7599 7600 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7601 { 7602 Mat_Product *product = mat->product; 7603 #if defined(PETSC_HAVE_DEVICE) 7604 PetscBool match = PETSC_FALSE; 7605 PetscBool usecpu = PETSC_FALSE; 7606 #else 7607 PetscBool match = PETSC_TRUE; 7608 #endif 7609 7610 PetscFunctionBegin; 7611 MatCheckProduct(mat, 1); 7612 #if defined(PETSC_HAVE_DEVICE) 7613 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7614 if (match) { /* we can always fallback to the CPU if requested */ 7615 switch (product->type) { 7616 case MATPRODUCT_AB: 7617 if (product->api_user) { 7618 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7619 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7620 PetscOptionsEnd(); 7621 } else { 7622 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7623 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7624 PetscOptionsEnd(); 7625 } 7626 break; 7627 case MATPRODUCT_AtB: 7628 if (product->api_user) { 7629 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7630 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7631 PetscOptionsEnd(); 7632 } else { 7633 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7634 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7635 PetscOptionsEnd(); 7636 } 7637 break; 7638 case MATPRODUCT_PtAP: 7639 if (product->api_user) { 7640 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7641 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7642 PetscOptionsEnd(); 7643 } else { 7644 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7645 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7646 PetscOptionsEnd(); 7647 } 7648 break; 7649 default: 7650 break; 7651 } 7652 match = (PetscBool)!usecpu; 7653 } 7654 #endif 7655 if (match) { 7656 switch (product->type) { 7657 case MATPRODUCT_AB: 7658 case MATPRODUCT_AtB: 7659 case MATPRODUCT_PtAP: 7660 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7661 break; 7662 default: 7663 break; 7664 } 7665 } 7666 /* fallback to MPIAIJ ops */ 7667 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7668 PetscFunctionReturn(PETSC_SUCCESS); 7669 } 7670 7671 /* 7672 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7673 7674 n - the number of block indices in cc[] 7675 cc - the block indices (must be large enough to contain the indices) 7676 */ 7677 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7678 { 7679 PetscInt cnt = -1, nidx, j; 7680 const PetscInt *idx; 7681 7682 PetscFunctionBegin; 7683 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7684 if (nidx) { 7685 cnt = 0; 7686 cc[cnt] = idx[0] / bs; 7687 for (j = 1; j < nidx; j++) { 7688 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7689 } 7690 } 7691 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7692 *n = cnt + 1; 7693 PetscFunctionReturn(PETSC_SUCCESS); 7694 } 7695 7696 /* 7697 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7698 7699 ncollapsed - the number of block indices 7700 collapsed - the block indices (must be large enough to contain the indices) 7701 */ 7702 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7703 { 7704 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7705 7706 PetscFunctionBegin; 7707 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7708 for (i = start + 1; i < start + bs; i++) { 7709 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7710 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7711 cprevtmp = cprev; 7712 cprev = merged; 7713 merged = cprevtmp; 7714 } 7715 *ncollapsed = nprev; 7716 if (collapsed) *collapsed = cprev; 7717 PetscFunctionReturn(PETSC_SUCCESS); 7718 } 7719 7720 /* 7721 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7722 7723 Input Parameter: 7724 . Amat - matrix 7725 - symmetrize - make the result symmetric 7726 + scale - scale with diagonal 7727 7728 Output Parameter: 7729 . a_Gmat - output scalar graph >= 0 7730 7731 */ 7732 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7733 { 7734 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7735 MPI_Comm comm; 7736 Mat Gmat; 7737 PetscBool ismpiaij, isseqaij; 7738 Mat a, b, c; 7739 MatType jtype; 7740 7741 PetscFunctionBegin; 7742 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7743 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7744 PetscCall(MatGetSize(Amat, &MM, &NN)); 7745 PetscCall(MatGetBlockSize(Amat, &bs)); 7746 nloc = (Iend - Istart) / bs; 7747 7748 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7749 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7750 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7751 7752 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7753 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7754 implementation */ 7755 if (bs > 1) { 7756 PetscCall(MatGetType(Amat, &jtype)); 7757 PetscCall(MatCreate(comm, &Gmat)); 7758 PetscCall(MatSetType(Gmat, jtype)); 7759 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7760 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7761 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7762 PetscInt *d_nnz, *o_nnz; 7763 MatScalar *aa, val, *AA; 7764 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7765 if (isseqaij) { 7766 a = Amat; 7767 b = NULL; 7768 } else { 7769 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7770 a = d->A; 7771 b = d->B; 7772 } 7773 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7774 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7775 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7776 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7777 const PetscInt *cols1, *cols2; 7778 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7779 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7780 nnz[brow / bs] = nc2 / bs; 7781 if (nc2 % bs) ok = 0; 7782 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7783 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7784 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7785 if (nc1 != nc2) ok = 0; 7786 else { 7787 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7788 if (cols1[jj] != cols2[jj]) ok = 0; 7789 if (cols1[jj] % bs != jj % bs) ok = 0; 7790 } 7791 } 7792 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7793 } 7794 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7795 if (!ok) { 7796 PetscCall(PetscFree2(d_nnz, o_nnz)); 7797 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7798 goto old_bs; 7799 } 7800 } 7801 } 7802 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7803 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7804 PetscCall(PetscFree2(d_nnz, o_nnz)); 7805 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7806 // diag 7807 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7808 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7809 ai = aseq->i; 7810 n = ai[brow + 1] - ai[brow]; 7811 aj = aseq->j + ai[brow]; 7812 for (int k = 0; k < n; k += bs) { // block columns 7813 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7814 val = 0; 7815 for (int ii = 0; ii < bs; ii++) { // rows in block 7816 aa = aseq->a + ai[brow + ii] + k; 7817 for (int jj = 0; jj < bs; jj++) { // columns in block 7818 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7819 } 7820 } 7821 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7822 AA[k / bs] = val; 7823 } 7824 grow = Istart / bs + brow / bs; 7825 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7826 } 7827 // off-diag 7828 if (ismpiaij) { 7829 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7830 const PetscScalar *vals; 7831 const PetscInt *cols, *garray = aij->garray; 7832 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7833 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7834 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7835 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7836 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7837 AA[k / bs] = 0; 7838 AJ[cidx] = garray[cols[k]] / bs; 7839 } 7840 nc = ncols / bs; 7841 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7842 for (int ii = 0; ii < bs; ii++) { // rows in block 7843 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7844 for (int k = 0; k < ncols; k += bs) { 7845 for (int jj = 0; jj < bs; jj++) { // cols in block 7846 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7847 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7848 } 7849 } 7850 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7851 } 7852 grow = Istart / bs + brow / bs; 7853 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7854 } 7855 } 7856 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7857 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7858 PetscCall(PetscFree2(AA, AJ)); 7859 } else { 7860 const PetscScalar *vals; 7861 const PetscInt *idx; 7862 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7863 old_bs: 7864 /* 7865 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7866 */ 7867 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7868 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7869 if (isseqaij) { 7870 PetscInt max_d_nnz; 7871 /* 7872 Determine exact preallocation count for (sequential) scalar matrix 7873 */ 7874 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7875 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7876 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7877 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7878 PetscCall(PetscFree3(w0, w1, w2)); 7879 } else if (ismpiaij) { 7880 Mat Daij, Oaij; 7881 const PetscInt *garray; 7882 PetscInt max_d_nnz; 7883 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7884 /* 7885 Determine exact preallocation count for diagonal block portion of scalar matrix 7886 */ 7887 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7888 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7889 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7890 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7891 PetscCall(PetscFree3(w0, w1, w2)); 7892 /* 7893 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7894 */ 7895 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7896 o_nnz[jj] = 0; 7897 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7898 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7899 o_nnz[jj] += ncols; 7900 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7901 } 7902 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7903 } 7904 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7905 /* get scalar copy (norms) of matrix */ 7906 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7907 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7908 PetscCall(PetscFree2(d_nnz, o_nnz)); 7909 for (Ii = Istart; Ii < Iend; Ii++) { 7910 PetscInt dest_row = Ii / bs; 7911 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7912 for (jj = 0; jj < ncols; jj++) { 7913 PetscInt dest_col = idx[jj] / bs; 7914 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7915 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7916 } 7917 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7918 } 7919 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7920 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7921 } 7922 } else { 7923 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7924 else { 7925 Gmat = Amat; 7926 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7927 } 7928 if (isseqaij) { 7929 a = Gmat; 7930 b = NULL; 7931 } else { 7932 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7933 a = d->A; 7934 b = d->B; 7935 } 7936 if (filter >= 0 || scale) { 7937 /* take absolute value of each entry */ 7938 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7939 MatInfo info; 7940 PetscScalar *avals; 7941 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7942 PetscCall(MatSeqAIJGetArray(c, &avals)); 7943 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7944 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7945 } 7946 } 7947 } 7948 if (symmetrize) { 7949 PetscBool isset, issym; 7950 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7951 if (!isset || !issym) { 7952 Mat matTrans; 7953 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7954 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7955 PetscCall(MatDestroy(&matTrans)); 7956 } 7957 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7958 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7959 if (scale) { 7960 /* scale c for all diagonal values = 1 or -1 */ 7961 Vec diag; 7962 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7963 PetscCall(MatGetDiagonal(Gmat, diag)); 7964 PetscCall(VecReciprocal(diag)); 7965 PetscCall(VecSqrtAbs(diag)); 7966 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7967 PetscCall(VecDestroy(&diag)); 7968 } 7969 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7970 7971 if (filter >= 0) { 7972 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 7973 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 7974 } 7975 *a_Gmat = Gmat; 7976 PetscFunctionReturn(PETSC_SUCCESS); 7977 } 7978 7979 /* 7980 Special version for direct calls from Fortran 7981 */ 7982 #include <petsc/private/fortranimpl.h> 7983 7984 /* Change these macros so can be used in void function */ 7985 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7986 #undef PetscCall 7987 #define PetscCall(...) \ 7988 do { \ 7989 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7990 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7991 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 7992 return; \ 7993 } \ 7994 } while (0) 7995 7996 #undef SETERRQ 7997 #define SETERRQ(comm, ierr, ...) \ 7998 do { \ 7999 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8000 return; \ 8001 } while (0) 8002 8003 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8004 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8005 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8006 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8007 #else 8008 #endif 8009 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8010 { 8011 Mat mat = *mmat; 8012 PetscInt m = *mm, n = *mn; 8013 InsertMode addv = *maddv; 8014 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8015 PetscScalar value; 8016 8017 MatCheckPreallocated(mat, 1); 8018 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8019 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8020 { 8021 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8022 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8023 PetscBool roworiented = aij->roworiented; 8024 8025 /* Some Variables required in the macro */ 8026 Mat A = aij->A; 8027 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8028 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8029 MatScalar *aa; 8030 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8031 Mat B = aij->B; 8032 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8033 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8034 MatScalar *ba; 8035 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8036 * cannot use "#if defined" inside a macro. */ 8037 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8038 8039 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8040 PetscInt nonew = a->nonew; 8041 MatScalar *ap1, *ap2; 8042 8043 PetscFunctionBegin; 8044 PetscCall(MatSeqAIJGetArray(A, &aa)); 8045 PetscCall(MatSeqAIJGetArray(B, &ba)); 8046 for (i = 0; i < m; i++) { 8047 if (im[i] < 0) continue; 8048 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8049 if (im[i] >= rstart && im[i] < rend) { 8050 row = im[i] - rstart; 8051 lastcol1 = -1; 8052 rp1 = aj + ai[row]; 8053 ap1 = aa + ai[row]; 8054 rmax1 = aimax[row]; 8055 nrow1 = ailen[row]; 8056 low1 = 0; 8057 high1 = nrow1; 8058 lastcol2 = -1; 8059 rp2 = bj + bi[row]; 8060 ap2 = ba + bi[row]; 8061 rmax2 = bimax[row]; 8062 nrow2 = bilen[row]; 8063 low2 = 0; 8064 high2 = nrow2; 8065 8066 for (j = 0; j < n; j++) { 8067 if (roworiented) value = v[i * n + j]; 8068 else value = v[i + j * m]; 8069 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8070 if (in[j] >= cstart && in[j] < cend) { 8071 col = in[j] - cstart; 8072 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8073 } else if (in[j] < 0) continue; 8074 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8075 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8076 } else { 8077 if (mat->was_assembled) { 8078 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8079 #if defined(PETSC_USE_CTABLE) 8080 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8081 col--; 8082 #else 8083 col = aij->colmap[in[j]] - 1; 8084 #endif 8085 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8086 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8087 col = in[j]; 8088 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8089 B = aij->B; 8090 b = (Mat_SeqAIJ *)B->data; 8091 bimax = b->imax; 8092 bi = b->i; 8093 bilen = b->ilen; 8094 bj = b->j; 8095 rp2 = bj + bi[row]; 8096 ap2 = ba + bi[row]; 8097 rmax2 = bimax[row]; 8098 nrow2 = bilen[row]; 8099 low2 = 0; 8100 high2 = nrow2; 8101 bm = aij->B->rmap->n; 8102 ba = b->a; 8103 inserted = PETSC_FALSE; 8104 } 8105 } else col = in[j]; 8106 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8107 } 8108 } 8109 } else if (!aij->donotstash) { 8110 if (roworiented) { 8111 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8112 } else { 8113 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8114 } 8115 } 8116 } 8117 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8118 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8119 } 8120 PetscFunctionReturnVoid(); 8121 } 8122 8123 /* Undefining these here since they were redefined from their original definition above! No 8124 * other PETSc functions should be defined past this point, as it is impossible to recover the 8125 * original definitions */ 8126 #undef PetscCall 8127 #undef SETERRQ 8128