1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 #if defined(PETSC_USE_LOG) 15 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 16 #endif 17 PetscCall(MatStashDestroy_Private(&mat->stash)); 18 PetscCall(VecDestroy(&aij->diag)); 19 PetscCall(MatDestroy(&aij->A)); 20 PetscCall(MatDestroy(&aij->B)); 21 #if defined(PETSC_USE_CTABLE) 22 PetscCall(PetscHMapIDestroy(&aij->colmap)); 23 #else 24 PetscCall(PetscFree(aij->colmap)); 25 #endif 26 PetscCall(PetscFree(aij->garray)); 27 PetscCall(VecDestroy(&aij->lvec)); 28 PetscCall(VecScatterDestroy(&aij->Mvctx)); 29 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 30 PetscCall(PetscFree(aij->ld)); 31 32 PetscCall(PetscFree(mat->data)); 33 34 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 35 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 36 37 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 47 #if defined(PETSC_HAVE_CUDA) 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 49 #endif 50 #if defined(PETSC_HAVE_HIP) 51 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 52 #endif 53 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 55 #endif 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 57 #if defined(PETSC_HAVE_ELEMENTAL) 58 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 59 #endif 60 #if defined(PETSC_HAVE_SCALAPACK) 61 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 62 #endif 63 #if defined(PETSC_HAVE_HYPRE) 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 66 #endif 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 73 #if defined(PETSC_HAVE_MKL_SPARSE) 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 75 #endif 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 79 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 85 #define TYPE AIJ 86 #define TYPE_AIJ 87 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 88 #undef TYPE 89 #undef TYPE_AIJ 90 91 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 92 { 93 Mat B; 94 95 PetscFunctionBegin; 96 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 97 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 98 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 99 PetscCall(MatDestroy(&B)); 100 PetscFunctionReturn(PETSC_SUCCESS); 101 } 102 103 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 104 { 105 Mat B; 106 107 PetscFunctionBegin; 108 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 109 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 110 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 111 PetscFunctionReturn(PETSC_SUCCESS); 112 } 113 114 /*MC 115 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 116 117 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 118 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 119 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 120 for communicators controlling multiple processes. It is recommended that you call both of 121 the above preallocation routines for simplicity. 122 123 Options Database Key: 124 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 125 126 Developer Note: 127 Level: beginner 128 129 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 130 enough exist. 131 132 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 133 M*/ 134 135 /*MC 136 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 137 138 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 139 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 140 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 141 for communicators controlling multiple processes. It is recommended that you call both of 142 the above preallocation routines for simplicity. 143 144 Options Database Key: 145 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 146 147 Level: beginner 148 149 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 150 M*/ 151 152 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 153 { 154 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 155 156 PetscFunctionBegin; 157 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 158 A->boundtocpu = flg; 159 #endif 160 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 161 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 162 163 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 164 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 165 * to differ from the parent matrix. */ 166 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 167 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 168 169 PetscFunctionReturn(PETSC_SUCCESS); 170 } 171 172 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 173 { 174 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 175 176 PetscFunctionBegin; 177 if (mat->A) { 178 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 179 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 180 } 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 185 { 186 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 187 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 188 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 189 const PetscInt *ia, *ib; 190 const MatScalar *aa, *bb, *aav, *bav; 191 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 192 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 193 194 PetscFunctionBegin; 195 *keptrows = NULL; 196 197 ia = a->i; 198 ib = b->i; 199 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 200 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 201 for (i = 0; i < m; i++) { 202 na = ia[i + 1] - ia[i]; 203 nb = ib[i + 1] - ib[i]; 204 if (!na && !nb) { 205 cnt++; 206 goto ok1; 207 } 208 aa = aav + ia[i]; 209 for (j = 0; j < na; j++) { 210 if (aa[j] != 0.0) goto ok1; 211 } 212 bb = bav + ib[i]; 213 for (j = 0; j < nb; j++) { 214 if (bb[j] != 0.0) goto ok1; 215 } 216 cnt++; 217 ok1:; 218 } 219 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 220 if (!n0rows) { 221 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 222 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 223 PetscFunctionReturn(PETSC_SUCCESS); 224 } 225 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 226 cnt = 0; 227 for (i = 0; i < m; i++) { 228 na = ia[i + 1] - ia[i]; 229 nb = ib[i + 1] - ib[i]; 230 if (!na && !nb) continue; 231 aa = aav + ia[i]; 232 for (j = 0; j < na; j++) { 233 if (aa[j] != 0.0) { 234 rows[cnt++] = rstart + i; 235 goto ok2; 236 } 237 } 238 bb = bav + ib[i]; 239 for (j = 0; j < nb; j++) { 240 if (bb[j] != 0.0) { 241 rows[cnt++] = rstart + i; 242 goto ok2; 243 } 244 } 245 ok2:; 246 } 247 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 248 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 249 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 250 PetscFunctionReturn(PETSC_SUCCESS); 251 } 252 253 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 254 { 255 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 256 PetscBool cong; 257 258 PetscFunctionBegin; 259 PetscCall(MatHasCongruentLayouts(Y, &cong)); 260 if (Y->assembled && cong) { 261 PetscCall(MatDiagonalSet(aij->A, D, is)); 262 } else { 263 PetscCall(MatDiagonalSet_Default(Y, D, is)); 264 } 265 PetscFunctionReturn(PETSC_SUCCESS); 266 } 267 268 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 269 { 270 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 271 PetscInt i, rstart, nrows, *rows; 272 273 PetscFunctionBegin; 274 *zrows = NULL; 275 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 276 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 277 for (i = 0; i < nrows; i++) rows[i] += rstart; 278 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 279 PetscFunctionReturn(PETSC_SUCCESS); 280 } 281 282 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 283 { 284 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 285 PetscInt i, m, n, *garray = aij->garray; 286 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 287 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 288 PetscReal *work; 289 const PetscScalar *dummy; 290 291 PetscFunctionBegin; 292 PetscCall(MatGetSize(A, &m, &n)); 293 PetscCall(PetscCalloc1(n, &work)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 296 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 297 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 298 if (type == NORM_2) { 299 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 300 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 301 } else if (type == NORM_1) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 304 } else if (type == NORM_INFINITY) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 307 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 310 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 311 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 312 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 313 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 314 if (type == NORM_INFINITY) { 315 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 316 } else { 317 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 318 } 319 PetscCall(PetscFree(work)); 320 if (type == NORM_2) { 321 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 322 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 323 for (i = 0; i < n; i++) reductions[i] /= m; 324 } 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 329 { 330 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 331 IS sis, gis; 332 const PetscInt *isis, *igis; 333 PetscInt n, *iis, nsis, ngis, rstart, i; 334 335 PetscFunctionBegin; 336 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 337 PetscCall(MatFindNonzeroRows(a->B, &gis)); 338 PetscCall(ISGetSize(gis, &ngis)); 339 PetscCall(ISGetSize(sis, &nsis)); 340 PetscCall(ISGetIndices(sis, &isis)); 341 PetscCall(ISGetIndices(gis, &igis)); 342 343 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 344 PetscCall(PetscArraycpy(iis, igis, ngis)); 345 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 346 n = ngis + nsis; 347 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 348 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 349 for (i = 0; i < n; i++) iis[i] += rstart; 350 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 351 352 PetscCall(ISRestoreIndices(sis, &isis)); 353 PetscCall(ISRestoreIndices(gis, &igis)); 354 PetscCall(ISDestroy(&sis)); 355 PetscCall(ISDestroy(&gis)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 /* 360 Local utility routine that creates a mapping from the global column 361 number to the local number in the off-diagonal part of the local 362 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 363 a slightly higher hash table cost; without it it is not scalable (each processor 364 has an order N integer array but is fast to access. 365 */ 366 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 367 { 368 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 369 PetscInt n = aij->B->cmap->n, i; 370 371 PetscFunctionBegin; 372 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 373 #if defined(PETSC_USE_CTABLE) 374 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 375 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 376 #else 377 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 378 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 379 #endif 380 PetscFunctionReturn(PETSC_SUCCESS); 381 } 382 383 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 384 { \ 385 if (col <= lastcol1) low1 = 0; \ 386 else high1 = nrow1; \ 387 lastcol1 = col; \ 388 while (high1 - low1 > 5) { \ 389 t = (low1 + high1) / 2; \ 390 if (rp1[t] > col) high1 = t; \ 391 else low1 = t; \ 392 } \ 393 for (_i = low1; _i < high1; _i++) { \ 394 if (rp1[_i] > col) break; \ 395 if (rp1[_i] == col) { \ 396 if (addv == ADD_VALUES) { \ 397 ap1[_i] += value; \ 398 /* Not sure LogFlops will slow dow the code or not */ \ 399 (void)PetscLogFlops(1.0); \ 400 } else ap1[_i] = value; \ 401 goto a_noinsert; \ 402 } \ 403 } \ 404 if (value == 0.0 && ignorezeroentries && row != col) { \ 405 low1 = 0; \ 406 high1 = nrow1; \ 407 goto a_noinsert; \ 408 } \ 409 if (nonew == 1) { \ 410 low1 = 0; \ 411 high1 = nrow1; \ 412 goto a_noinsert; \ 413 } \ 414 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 415 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 416 N = nrow1++ - 1; \ 417 a->nz++; \ 418 high1++; \ 419 /* shift up all the later entries in this row */ \ 420 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 421 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 422 rp1[_i] = col; \ 423 ap1[_i] = value; \ 424 A->nonzerostate++; \ 425 a_noinsert:; \ 426 ailen[row] = nrow1; \ 427 } 428 429 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 430 { \ 431 if (col <= lastcol2) low2 = 0; \ 432 else high2 = nrow2; \ 433 lastcol2 = col; \ 434 while (high2 - low2 > 5) { \ 435 t = (low2 + high2) / 2; \ 436 if (rp2[t] > col) high2 = t; \ 437 else low2 = t; \ 438 } \ 439 for (_i = low2; _i < high2; _i++) { \ 440 if (rp2[_i] > col) break; \ 441 if (rp2[_i] == col) { \ 442 if (addv == ADD_VALUES) { \ 443 ap2[_i] += value; \ 444 (void)PetscLogFlops(1.0); \ 445 } else ap2[_i] = value; \ 446 goto b_noinsert; \ 447 } \ 448 } \ 449 if (value == 0.0 && ignorezeroentries) { \ 450 low2 = 0; \ 451 high2 = nrow2; \ 452 goto b_noinsert; \ 453 } \ 454 if (nonew == 1) { \ 455 low2 = 0; \ 456 high2 = nrow2; \ 457 goto b_noinsert; \ 458 } \ 459 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 460 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 461 N = nrow2++ - 1; \ 462 b->nz++; \ 463 high2++; \ 464 /* shift up all the later entries in this row */ \ 465 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 466 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 467 rp2[_i] = col; \ 468 ap2[_i] = value; \ 469 B->nonzerostate++; \ 470 b_noinsert:; \ 471 bilen[row] = nrow2; \ 472 } 473 474 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 475 { 476 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 477 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 478 PetscInt l, *garray = mat->garray, diag; 479 PetscScalar *aa, *ba; 480 481 PetscFunctionBegin; 482 /* code only works for square matrices A */ 483 484 /* find size of row to the left of the diagonal part */ 485 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 486 row = row - diag; 487 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 488 if (garray[b->j[b->i[row] + l]] > diag) break; 489 } 490 if (l) { 491 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 492 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 493 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 494 } 495 496 /* diagonal part */ 497 if (a->i[row + 1] - a->i[row]) { 498 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 499 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 500 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 501 } 502 503 /* right of diagonal part */ 504 if (b->i[row + 1] - b->i[row] - l) { 505 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 506 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 507 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 508 } 509 PetscFunctionReturn(PETSC_SUCCESS); 510 } 511 512 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 513 { 514 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 515 PetscScalar value = 0.0; 516 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 517 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 518 PetscBool roworiented = aij->roworiented; 519 520 /* Some Variables required in the macro */ 521 Mat A = aij->A; 522 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 523 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 524 PetscBool ignorezeroentries = a->ignorezeroentries; 525 Mat B = aij->B; 526 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 527 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 528 MatScalar *aa, *ba; 529 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 530 PetscInt nonew; 531 MatScalar *ap1, *ap2; 532 533 PetscFunctionBegin; 534 PetscCall(MatSeqAIJGetArray(A, &aa)); 535 PetscCall(MatSeqAIJGetArray(B, &ba)); 536 for (i = 0; i < m; i++) { 537 if (im[i] < 0) continue; 538 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 539 if (im[i] >= rstart && im[i] < rend) { 540 row = im[i] - rstart; 541 lastcol1 = -1; 542 rp1 = aj + ai[row]; 543 ap1 = aa + ai[row]; 544 rmax1 = aimax[row]; 545 nrow1 = ailen[row]; 546 low1 = 0; 547 high1 = nrow1; 548 lastcol2 = -1; 549 rp2 = bj + bi[row]; 550 ap2 = ba + bi[row]; 551 rmax2 = bimax[row]; 552 nrow2 = bilen[row]; 553 low2 = 0; 554 high2 = nrow2; 555 556 for (j = 0; j < n; j++) { 557 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 558 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 559 if (in[j] >= cstart && in[j] < cend) { 560 col = in[j] - cstart; 561 nonew = a->nonew; 562 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 563 } else if (in[j] < 0) { 564 continue; 565 } else { 566 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 567 if (mat->was_assembled) { 568 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 569 #if defined(PETSC_USE_CTABLE) 570 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 571 col--; 572 #else 573 col = aij->colmap[in[j]] - 1; 574 #endif 575 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 576 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 577 col = in[j]; 578 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 579 B = aij->B; 580 b = (Mat_SeqAIJ *)B->data; 581 bimax = b->imax; 582 bi = b->i; 583 bilen = b->ilen; 584 bj = b->j; 585 ba = b->a; 586 rp2 = bj + bi[row]; 587 ap2 = ba + bi[row]; 588 rmax2 = bimax[row]; 589 nrow2 = bilen[row]; 590 low2 = 0; 591 high2 = nrow2; 592 bm = aij->B->rmap->n; 593 ba = b->a; 594 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 595 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 596 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 597 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 598 } 599 } else col = in[j]; 600 nonew = b->nonew; 601 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 602 } 603 } 604 } else { 605 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 606 if (!aij->donotstash) { 607 mat->assembled = PETSC_FALSE; 608 if (roworiented) { 609 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 610 } else { 611 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 612 } 613 } 614 } 615 } 616 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 617 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 618 PetscFunctionReturn(PETSC_SUCCESS); 619 } 620 621 /* 622 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 623 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 624 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 625 */ 626 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 627 { 628 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 629 Mat A = aij->A; /* diagonal part of the matrix */ 630 Mat B = aij->B; /* offdiagonal part of the matrix */ 631 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 632 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 633 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 634 PetscInt *ailen = a->ilen, *aj = a->j; 635 PetscInt *bilen = b->ilen, *bj = b->j; 636 PetscInt am = aij->A->rmap->n, j; 637 PetscInt diag_so_far = 0, dnz; 638 PetscInt offd_so_far = 0, onz; 639 640 PetscFunctionBegin; 641 /* Iterate over all rows of the matrix */ 642 for (j = 0; j < am; j++) { 643 dnz = onz = 0; 644 /* Iterate over all non-zero columns of the current row */ 645 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 646 /* If column is in the diagonal */ 647 if (mat_j[col] >= cstart && mat_j[col] < cend) { 648 aj[diag_so_far++] = mat_j[col] - cstart; 649 dnz++; 650 } else { /* off-diagonal entries */ 651 bj[offd_so_far++] = mat_j[col]; 652 onz++; 653 } 654 } 655 ailen[j] = dnz; 656 bilen[j] = onz; 657 } 658 PetscFunctionReturn(PETSC_SUCCESS); 659 } 660 661 /* 662 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 663 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 664 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 665 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 666 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 667 */ 668 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 671 Mat A = aij->A; /* diagonal part of the matrix */ 672 Mat B = aij->B; /* offdiagonal part of the matrix */ 673 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 674 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 675 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 676 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 677 PetscInt *ailen = a->ilen, *aj = a->j; 678 PetscInt *bilen = b->ilen, *bj = b->j; 679 PetscInt am = aij->A->rmap->n, j; 680 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 681 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 682 PetscScalar *aa = a->a, *ba = b->a; 683 684 PetscFunctionBegin; 685 /* Iterate over all rows of the matrix */ 686 for (j = 0; j < am; j++) { 687 dnz_row = onz_row = 0; 688 rowstart_offd = full_offd_i[j]; 689 rowstart_diag = full_diag_i[j]; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 695 aa[rowstart_diag + dnz_row] = mat_a[col]; 696 dnz_row++; 697 } else { /* off-diagonal entries */ 698 bj[rowstart_offd + onz_row] = mat_j[col]; 699 ba[rowstart_offd + onz_row] = mat_a[col]; 700 onz_row++; 701 } 702 } 703 ailen[j] = dnz_row; 704 bilen[j] = onz_row; 705 } 706 PetscFunctionReturn(PETSC_SUCCESS); 707 } 708 709 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 710 { 711 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 712 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 713 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 714 715 PetscFunctionBegin; 716 for (i = 0; i < m; i++) { 717 if (idxm[i] < 0) continue; /* negative row */ 718 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 719 if (idxm[i] >= rstart && idxm[i] < rend) { 720 row = idxm[i] - rstart; 721 for (j = 0; j < n; j++) { 722 if (idxn[j] < 0) continue; /* negative column */ 723 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 724 if (idxn[j] >= cstart && idxn[j] < cend) { 725 col = idxn[j] - cstart; 726 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 727 } else { 728 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 729 #if defined(PETSC_USE_CTABLE) 730 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 731 col--; 732 #else 733 col = aij->colmap[idxn[j]] - 1; 734 #endif 735 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 736 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 737 } 738 } 739 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 740 } 741 PetscFunctionReturn(PETSC_SUCCESS); 742 } 743 744 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 747 PetscInt nstash, reallocs; 748 749 PetscFunctionBegin; 750 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 751 752 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 753 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 754 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 755 PetscFunctionReturn(PETSC_SUCCESS); 756 } 757 758 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 759 { 760 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 761 PetscMPIInt n; 762 PetscInt i, j, rstart, ncols, flg; 763 PetscInt *row, *col; 764 PetscBool other_disassembled; 765 PetscScalar *val; 766 767 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 768 769 PetscFunctionBegin; 770 if (!aij->donotstash && !mat->nooffprocentries) { 771 while (1) { 772 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 773 if (!flg) break; 774 775 for (i = 0; i < n;) { 776 /* Now identify the consecutive vals belonging to the same row */ 777 for (j = i, rstart = row[j]; j < n; j++) { 778 if (row[j] != rstart) break; 779 } 780 if (j < n) ncols = j - i; 781 else ncols = n - i; 782 /* Now assemble all these values with a single function call */ 783 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 784 i = j; 785 } 786 } 787 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 788 } 789 #if defined(PETSC_HAVE_DEVICE) 790 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 791 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 792 if (mat->boundtocpu) { 793 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 794 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 795 } 796 #endif 797 PetscCall(MatAssemblyBegin(aij->A, mode)); 798 PetscCall(MatAssemblyEnd(aij->A, mode)); 799 800 /* determine if any processor has disassembled, if so we must 801 also disassemble ourself, in order that we may reassemble. */ 802 /* 803 if nonzero structure of submatrix B cannot change then we know that 804 no processor disassembled thus we can skip this stuff 805 */ 806 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 807 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 808 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 809 PetscCall(MatDisAssemble_MPIAIJ(mat)); 810 } 811 } 812 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 813 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 814 #if defined(PETSC_HAVE_DEVICE) 815 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 816 #endif 817 PetscCall(MatAssemblyBegin(aij->B, mode)); 818 PetscCall(MatAssemblyEnd(aij->B, mode)); 819 820 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 821 822 aij->rowvalues = NULL; 823 824 PetscCall(VecDestroy(&aij->diag)); 825 826 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 827 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 828 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 829 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 830 } 831 #if defined(PETSC_HAVE_DEVICE) 832 mat->offloadmask = PETSC_OFFLOAD_BOTH; 833 #endif 834 PetscFunctionReturn(PETSC_SUCCESS); 835 } 836 837 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 838 { 839 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 840 841 PetscFunctionBegin; 842 PetscCall(MatZeroEntries(l->A)); 843 PetscCall(MatZeroEntries(l->B)); 844 PetscFunctionReturn(PETSC_SUCCESS); 845 } 846 847 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 848 { 849 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 850 PetscObjectState sA, sB; 851 PetscInt *lrows; 852 PetscInt r, len; 853 PetscBool cong, lch, gch; 854 855 PetscFunctionBegin; 856 /* get locally owned rows */ 857 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 858 PetscCall(MatHasCongruentLayouts(A, &cong)); 859 /* fix right hand side if needed */ 860 if (x && b) { 861 const PetscScalar *xx; 862 PetscScalar *bb; 863 864 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 865 PetscCall(VecGetArrayRead(x, &xx)); 866 PetscCall(VecGetArray(b, &bb)); 867 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 868 PetscCall(VecRestoreArrayRead(x, &xx)); 869 PetscCall(VecRestoreArray(b, &bb)); 870 } 871 872 sA = mat->A->nonzerostate; 873 sB = mat->B->nonzerostate; 874 875 if (diag != 0.0 && cong) { 876 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 877 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 878 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 879 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 880 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 881 PetscInt nnwA, nnwB; 882 PetscBool nnzA, nnzB; 883 884 nnwA = aijA->nonew; 885 nnwB = aijB->nonew; 886 nnzA = aijA->keepnonzeropattern; 887 nnzB = aijB->keepnonzeropattern; 888 if (!nnzA) { 889 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 890 aijA->nonew = 0; 891 } 892 if (!nnzB) { 893 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 894 aijB->nonew = 0; 895 } 896 /* Must zero here before the next loop */ 897 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 898 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 899 for (r = 0; r < len; ++r) { 900 const PetscInt row = lrows[r] + A->rmap->rstart; 901 if (row >= A->cmap->N) continue; 902 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 903 } 904 aijA->nonew = nnwA; 905 aijB->nonew = nnwB; 906 } else { 907 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 908 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 909 } 910 PetscCall(PetscFree(lrows)); 911 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 912 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 913 914 /* reduce nonzerostate */ 915 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 916 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 917 if (gch) A->nonzerostate++; 918 PetscFunctionReturn(PETSC_SUCCESS); 919 } 920 921 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 922 { 923 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 924 PetscMPIInt n = A->rmap->n; 925 PetscInt i, j, r, m, len = 0; 926 PetscInt *lrows, *owners = A->rmap->range; 927 PetscMPIInt p = 0; 928 PetscSFNode *rrows; 929 PetscSF sf; 930 const PetscScalar *xx; 931 PetscScalar *bb, *mask, *aij_a; 932 Vec xmask, lmask; 933 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 934 const PetscInt *aj, *ii, *ridx; 935 PetscScalar *aa; 936 937 PetscFunctionBegin; 938 /* Create SF where leaves are input rows and roots are owned rows */ 939 PetscCall(PetscMalloc1(n, &lrows)); 940 for (r = 0; r < n; ++r) lrows[r] = -1; 941 PetscCall(PetscMalloc1(N, &rrows)); 942 for (r = 0; r < N; ++r) { 943 const PetscInt idx = rows[r]; 944 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 945 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 946 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 947 } 948 rrows[r].rank = p; 949 rrows[r].index = rows[r] - owners[p]; 950 } 951 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 952 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 953 /* Collect flags for rows to be zeroed */ 954 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 955 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 956 PetscCall(PetscSFDestroy(&sf)); 957 /* Compress and put in row numbers */ 958 for (r = 0; r < n; ++r) 959 if (lrows[r] >= 0) lrows[len++] = r; 960 /* zero diagonal part of matrix */ 961 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 962 /* handle off diagonal part of matrix */ 963 PetscCall(MatCreateVecs(A, &xmask, NULL)); 964 PetscCall(VecDuplicate(l->lvec, &lmask)); 965 PetscCall(VecGetArray(xmask, &bb)); 966 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 967 PetscCall(VecRestoreArray(xmask, &bb)); 968 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 969 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecDestroy(&xmask)); 971 if (x && b) { /* this code is buggy when the row and column layout don't match */ 972 PetscBool cong; 973 974 PetscCall(MatHasCongruentLayouts(A, &cong)); 975 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 976 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 977 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 978 PetscCall(VecGetArrayRead(l->lvec, &xx)); 979 PetscCall(VecGetArray(b, &bb)); 980 } 981 PetscCall(VecGetArray(lmask, &mask)); 982 /* remove zeroed rows of off diagonal matrix */ 983 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 984 ii = aij->i; 985 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 986 /* loop over all elements of off process part of matrix zeroing removed columns*/ 987 if (aij->compressedrow.use) { 988 m = aij->compressedrow.nrows; 989 ii = aij->compressedrow.i; 990 ridx = aij->compressedrow.rindex; 991 for (i = 0; i < m; i++) { 992 n = ii[i + 1] - ii[i]; 993 aj = aij->j + ii[i]; 994 aa = aij_a + ii[i]; 995 996 for (j = 0; j < n; j++) { 997 if (PetscAbsScalar(mask[*aj])) { 998 if (b) bb[*ridx] -= *aa * xx[*aj]; 999 *aa = 0.0; 1000 } 1001 aa++; 1002 aj++; 1003 } 1004 ridx++; 1005 } 1006 } else { /* do not use compressed row format */ 1007 m = l->B->rmap->n; 1008 for (i = 0; i < m; i++) { 1009 n = ii[i + 1] - ii[i]; 1010 aj = aij->j + ii[i]; 1011 aa = aij_a + ii[i]; 1012 for (j = 0; j < n; j++) { 1013 if (PetscAbsScalar(mask[*aj])) { 1014 if (b) bb[i] -= *aa * xx[*aj]; 1015 *aa = 0.0; 1016 } 1017 aa++; 1018 aj++; 1019 } 1020 } 1021 } 1022 if (x && b) { 1023 PetscCall(VecRestoreArray(b, &bb)); 1024 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1025 } 1026 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1027 PetscCall(VecRestoreArray(lmask, &mask)); 1028 PetscCall(VecDestroy(&lmask)); 1029 PetscCall(PetscFree(lrows)); 1030 1031 /* only change matrix nonzero state if pattern was allowed to be changed */ 1032 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1033 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1034 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1035 } 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } 1038 1039 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1040 { 1041 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1042 PetscInt nt; 1043 VecScatter Mvctx = a->Mvctx; 1044 1045 PetscFunctionBegin; 1046 PetscCall(VecGetLocalSize(xx, &nt)); 1047 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1048 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1049 PetscUseTypeMethod(a->A, mult, xx, yy); 1050 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1051 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1058 1059 PetscFunctionBegin; 1060 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1061 PetscFunctionReturn(PETSC_SUCCESS); 1062 } 1063 1064 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1065 { 1066 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1067 VecScatter Mvctx = a->Mvctx; 1068 1069 PetscFunctionBegin; 1070 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1071 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1072 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1073 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 1081 PetscFunctionBegin; 1082 /* do nondiagonal part */ 1083 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1084 /* do local part */ 1085 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1086 /* add partial results together */ 1087 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1088 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1089 PetscFunctionReturn(PETSC_SUCCESS); 1090 } 1091 1092 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1093 { 1094 MPI_Comm comm; 1095 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1096 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1097 IS Me, Notme; 1098 PetscInt M, N, first, last, *notme, i; 1099 PetscBool lf; 1100 PetscMPIInt size; 1101 1102 PetscFunctionBegin; 1103 /* Easy test: symmetric diagonal block */ 1104 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1105 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1106 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1107 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1108 PetscCallMPI(MPI_Comm_size(comm, &size)); 1109 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1110 1111 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1112 PetscCall(MatGetSize(Amat, &M, &N)); 1113 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1114 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1115 for (i = 0; i < first; i++) notme[i] = i; 1116 for (i = last; i < M; i++) notme[i - last + first] = i; 1117 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1118 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1119 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1120 Aoff = Aoffs[0]; 1121 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1122 Boff = Boffs[0]; 1123 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1124 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1125 PetscCall(MatDestroyMatrices(1, &Boffs)); 1126 PetscCall(ISDestroy(&Me)); 1127 PetscCall(ISDestroy(&Notme)); 1128 PetscCall(PetscFree(notme)); 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131 1132 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1133 { 1134 PetscFunctionBegin; 1135 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1136 PetscFunctionReturn(PETSC_SUCCESS); 1137 } 1138 1139 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1140 { 1141 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1142 1143 PetscFunctionBegin; 1144 /* do nondiagonal part */ 1145 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1146 /* do local part */ 1147 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1148 /* add partial results together */ 1149 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1150 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1151 PetscFunctionReturn(PETSC_SUCCESS); 1152 } 1153 1154 /* 1155 This only works correctly for square matrices where the subblock A->A is the 1156 diagonal block 1157 */ 1158 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1159 { 1160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1161 1162 PetscFunctionBegin; 1163 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1164 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1165 PetscCall(MatGetDiagonal(a->A, v)); 1166 PetscFunctionReturn(PETSC_SUCCESS); 1167 } 1168 1169 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1170 { 1171 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1172 1173 PetscFunctionBegin; 1174 PetscCall(MatScale(a->A, aa)); 1175 PetscCall(MatScale(a->B, aa)); 1176 PetscFunctionReturn(PETSC_SUCCESS); 1177 } 1178 1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1180 { 1181 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1182 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1183 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1184 const PetscInt *garray = aij->garray; 1185 const PetscScalar *aa, *ba; 1186 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1187 PetscInt64 nz, hnz; 1188 PetscInt *rowlens; 1189 PetscInt *colidxs; 1190 PetscScalar *matvals; 1191 PetscMPIInt rank; 1192 1193 PetscFunctionBegin; 1194 PetscCall(PetscViewerSetUp(viewer)); 1195 1196 M = mat->rmap->N; 1197 N = mat->cmap->N; 1198 m = mat->rmap->n; 1199 rs = mat->rmap->rstart; 1200 cs = mat->cmap->rstart; 1201 nz = A->nz + B->nz; 1202 1203 /* write matrix header */ 1204 header[0] = MAT_FILE_CLASSID; 1205 header[1] = M; 1206 header[2] = N; 1207 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1208 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1209 if (rank == 0) { 1210 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1211 else header[3] = (PetscInt)hnz; 1212 } 1213 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1214 1215 /* fill in and store row lengths */ 1216 PetscCall(PetscMalloc1(m, &rowlens)); 1217 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1218 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1219 PetscCall(PetscFree(rowlens)); 1220 1221 /* fill in and store column indices */ 1222 PetscCall(PetscMalloc1(nz, &colidxs)); 1223 for (cnt = 0, i = 0; i < m; i++) { 1224 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1225 if (garray[B->j[jb]] > cs) break; 1226 colidxs[cnt++] = garray[B->j[jb]]; 1227 } 1228 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1229 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1232 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1233 PetscCall(PetscFree(colidxs)); 1234 1235 /* fill in and store nonzero values */ 1236 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1237 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1238 PetscCall(PetscMalloc1(nz, &matvals)); 1239 for (cnt = 0, i = 0; i < m; i++) { 1240 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1241 if (garray[B->j[jb]] > cs) break; 1242 matvals[cnt++] = ba[jb]; 1243 } 1244 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1245 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1246 } 1247 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1248 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1249 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1250 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1251 PetscCall(PetscFree(matvals)); 1252 1253 /* write block size option to the viewer's .info file */ 1254 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1255 PetscFunctionReturn(PETSC_SUCCESS); 1256 } 1257 1258 #include <petscdraw.h> 1259 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1260 { 1261 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1262 PetscMPIInt rank = aij->rank, size = aij->size; 1263 PetscBool isdraw, iascii, isbinary; 1264 PetscViewer sviewer; 1265 PetscViewerFormat format; 1266 1267 PetscFunctionBegin; 1268 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1269 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1270 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1271 if (iascii) { 1272 PetscCall(PetscViewerGetFormat(viewer, &format)); 1273 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1274 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1275 PetscCall(PetscMalloc1(size, &nz)); 1276 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1277 for (i = 0; i < (PetscInt)size; i++) { 1278 nmax = PetscMax(nmax, nz[i]); 1279 nmin = PetscMin(nmin, nz[i]); 1280 navg += nz[i]; 1281 } 1282 PetscCall(PetscFree(nz)); 1283 navg = navg / size; 1284 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1285 PetscFunctionReturn(PETSC_SUCCESS); 1286 } 1287 PetscCall(PetscViewerGetFormat(viewer, &format)); 1288 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1289 MatInfo info; 1290 PetscInt *inodes = NULL; 1291 1292 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1293 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1294 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1295 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1296 if (!inodes) { 1297 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1298 (double)info.memory)); 1299 } else { 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1301 (double)info.memory)); 1302 } 1303 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1304 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1305 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1306 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1307 PetscCall(PetscViewerFlush(viewer)); 1308 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1309 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1310 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1311 PetscFunctionReturn(PETSC_SUCCESS); 1312 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1313 PetscInt inodecount, inodelimit, *inodes; 1314 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1315 if (inodes) { 1316 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1317 } else { 1318 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1319 } 1320 PetscFunctionReturn(PETSC_SUCCESS); 1321 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1322 PetscFunctionReturn(PETSC_SUCCESS); 1323 } 1324 } else if (isbinary) { 1325 if (size == 1) { 1326 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1327 PetscCall(MatView(aij->A, viewer)); 1328 } else { 1329 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1330 } 1331 PetscFunctionReturn(PETSC_SUCCESS); 1332 } else if (iascii && size == 1) { 1333 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1334 PetscCall(MatView(aij->A, viewer)); 1335 PetscFunctionReturn(PETSC_SUCCESS); 1336 } else if (isdraw) { 1337 PetscDraw draw; 1338 PetscBool isnull; 1339 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1340 PetscCall(PetscDrawIsNull(draw, &isnull)); 1341 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1342 } 1343 1344 { /* assemble the entire matrix onto first processor */ 1345 Mat A = NULL, Av; 1346 IS isrow, iscol; 1347 1348 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1349 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1350 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1351 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1352 /* The commented code uses MatCreateSubMatrices instead */ 1353 /* 1354 Mat *AA, A = NULL, Av; 1355 IS isrow,iscol; 1356 1357 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1358 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1359 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1360 if (rank == 0) { 1361 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1362 A = AA[0]; 1363 Av = AA[0]; 1364 } 1365 PetscCall(MatDestroySubMatrices(1,&AA)); 1366 */ 1367 PetscCall(ISDestroy(&iscol)); 1368 PetscCall(ISDestroy(&isrow)); 1369 /* 1370 Everyone has to call to draw the matrix since the graphics waits are 1371 synchronized across all processors that share the PetscDraw object 1372 */ 1373 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1374 if (rank == 0) { 1375 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1376 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1377 } 1378 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1379 PetscCall(PetscViewerFlush(viewer)); 1380 PetscCall(MatDestroy(&A)); 1381 } 1382 PetscFunctionReturn(PETSC_SUCCESS); 1383 } 1384 1385 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1386 { 1387 PetscBool iascii, isdraw, issocket, isbinary; 1388 1389 PetscFunctionBegin; 1390 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1391 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1394 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1395 PetscFunctionReturn(PETSC_SUCCESS); 1396 } 1397 1398 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1399 { 1400 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1401 Vec bb1 = NULL; 1402 PetscBool hasop; 1403 1404 PetscFunctionBegin; 1405 if (flag == SOR_APPLY_UPPER) { 1406 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1407 PetscFunctionReturn(PETSC_SUCCESS); 1408 } 1409 1410 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1411 1412 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1413 if (flag & SOR_ZERO_INITIAL_GUESS) { 1414 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1415 its--; 1416 } 1417 1418 while (its--) { 1419 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1420 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 1422 /* update rhs: bb1 = bb - B*x */ 1423 PetscCall(VecScale(mat->lvec, -1.0)); 1424 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1425 1426 /* local sweep */ 1427 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1428 } 1429 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1430 if (flag & SOR_ZERO_INITIAL_GUESS) { 1431 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1432 its--; 1433 } 1434 while (its--) { 1435 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1436 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 1438 /* update rhs: bb1 = bb - B*x */ 1439 PetscCall(VecScale(mat->lvec, -1.0)); 1440 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1441 1442 /* local sweep */ 1443 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1444 } 1445 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1446 if (flag & SOR_ZERO_INITIAL_GUESS) { 1447 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1448 its--; 1449 } 1450 while (its--) { 1451 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1452 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 1454 /* update rhs: bb1 = bb - B*x */ 1455 PetscCall(VecScale(mat->lvec, -1.0)); 1456 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1457 1458 /* local sweep */ 1459 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1460 } 1461 } else if (flag & SOR_EISENSTAT) { 1462 Vec xx1; 1463 1464 PetscCall(VecDuplicate(bb, &xx1)); 1465 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1466 1467 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1468 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1469 if (!mat->diag) { 1470 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1471 PetscCall(MatGetDiagonal(matin, mat->diag)); 1472 } 1473 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1474 if (hasop) { 1475 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1476 } else { 1477 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1478 } 1479 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1480 1481 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1482 1483 /* local sweep */ 1484 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1485 PetscCall(VecAXPY(xx, 1.0, xx1)); 1486 PetscCall(VecDestroy(&xx1)); 1487 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1488 1489 PetscCall(VecDestroy(&bb1)); 1490 1491 matin->factorerrortype = mat->A->factorerrortype; 1492 PetscFunctionReturn(PETSC_SUCCESS); 1493 } 1494 1495 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1496 { 1497 Mat aA, aB, Aperm; 1498 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1499 PetscScalar *aa, *ba; 1500 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1501 PetscSF rowsf, sf; 1502 IS parcolp = NULL; 1503 PetscBool done; 1504 1505 PetscFunctionBegin; 1506 PetscCall(MatGetLocalSize(A, &m, &n)); 1507 PetscCall(ISGetIndices(rowp, &rwant)); 1508 PetscCall(ISGetIndices(colp, &cwant)); 1509 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1510 1511 /* Invert row permutation to find out where my rows should go */ 1512 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1513 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1514 PetscCall(PetscSFSetFromOptions(rowsf)); 1515 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1516 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1517 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1518 1519 /* Invert column permutation to find out where my columns should go */ 1520 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1521 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1522 PetscCall(PetscSFSetFromOptions(sf)); 1523 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1524 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1525 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1526 PetscCall(PetscSFDestroy(&sf)); 1527 1528 PetscCall(ISRestoreIndices(rowp, &rwant)); 1529 PetscCall(ISRestoreIndices(colp, &cwant)); 1530 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1531 1532 /* Find out where my gcols should go */ 1533 PetscCall(MatGetSize(aB, NULL, &ng)); 1534 PetscCall(PetscMalloc1(ng, &gcdest)); 1535 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1536 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1537 PetscCall(PetscSFSetFromOptions(sf)); 1538 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1539 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1540 PetscCall(PetscSFDestroy(&sf)); 1541 1542 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1543 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1544 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1545 for (i = 0; i < m; i++) { 1546 PetscInt row = rdest[i]; 1547 PetscMPIInt rowner; 1548 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1549 for (j = ai[i]; j < ai[i + 1]; j++) { 1550 PetscInt col = cdest[aj[j]]; 1551 PetscMPIInt cowner; 1552 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1553 if (rowner == cowner) dnnz[i]++; 1554 else onnz[i]++; 1555 } 1556 for (j = bi[i]; j < bi[i + 1]; j++) { 1557 PetscInt col = gcdest[bj[j]]; 1558 PetscMPIInt cowner; 1559 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1560 if (rowner == cowner) dnnz[i]++; 1561 else onnz[i]++; 1562 } 1563 } 1564 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1565 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1566 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1568 PetscCall(PetscSFDestroy(&rowsf)); 1569 1570 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1571 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1572 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1573 for (i = 0; i < m; i++) { 1574 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1575 PetscInt j0, rowlen; 1576 rowlen = ai[i + 1] - ai[i]; 1577 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1578 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1579 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1580 } 1581 rowlen = bi[i + 1] - bi[i]; 1582 for (j0 = j = 0; j < rowlen; j0 = j) { 1583 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1584 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1585 } 1586 } 1587 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1588 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1589 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1590 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1591 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1592 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1593 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1594 PetscCall(PetscFree3(work, rdest, cdest)); 1595 PetscCall(PetscFree(gcdest)); 1596 if (parcolp) PetscCall(ISDestroy(&colp)); 1597 *B = Aperm; 1598 PetscFunctionReturn(PETSC_SUCCESS); 1599 } 1600 1601 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1602 { 1603 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1604 1605 PetscFunctionBegin; 1606 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1607 if (ghosts) *ghosts = aij->garray; 1608 PetscFunctionReturn(PETSC_SUCCESS); 1609 } 1610 1611 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1612 { 1613 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1614 Mat A = mat->A, B = mat->B; 1615 PetscLogDouble isend[5], irecv[5]; 1616 1617 PetscFunctionBegin; 1618 info->block_size = 1.0; 1619 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1620 1621 isend[0] = info->nz_used; 1622 isend[1] = info->nz_allocated; 1623 isend[2] = info->nz_unneeded; 1624 isend[3] = info->memory; 1625 isend[4] = info->mallocs; 1626 1627 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1628 1629 isend[0] += info->nz_used; 1630 isend[1] += info->nz_allocated; 1631 isend[2] += info->nz_unneeded; 1632 isend[3] += info->memory; 1633 isend[4] += info->mallocs; 1634 if (flag == MAT_LOCAL) { 1635 info->nz_used = isend[0]; 1636 info->nz_allocated = isend[1]; 1637 info->nz_unneeded = isend[2]; 1638 info->memory = isend[3]; 1639 info->mallocs = isend[4]; 1640 } else if (flag == MAT_GLOBAL_MAX) { 1641 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1642 1643 info->nz_used = irecv[0]; 1644 info->nz_allocated = irecv[1]; 1645 info->nz_unneeded = irecv[2]; 1646 info->memory = irecv[3]; 1647 info->mallocs = irecv[4]; 1648 } else if (flag == MAT_GLOBAL_SUM) { 1649 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1650 1651 info->nz_used = irecv[0]; 1652 info->nz_allocated = irecv[1]; 1653 info->nz_unneeded = irecv[2]; 1654 info->memory = irecv[3]; 1655 info->mallocs = irecv[4]; 1656 } 1657 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1658 info->fill_ratio_needed = 0; 1659 info->factor_mallocs = 0; 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1664 { 1665 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1666 1667 PetscFunctionBegin; 1668 switch (op) { 1669 case MAT_NEW_NONZERO_LOCATIONS: 1670 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1671 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1672 case MAT_KEEP_NONZERO_PATTERN: 1673 case MAT_NEW_NONZERO_LOCATION_ERR: 1674 case MAT_USE_INODES: 1675 case MAT_IGNORE_ZERO_ENTRIES: 1676 case MAT_FORM_EXPLICIT_TRANSPOSE: 1677 MatCheckPreallocated(A, 1); 1678 PetscCall(MatSetOption(a->A, op, flg)); 1679 PetscCall(MatSetOption(a->B, op, flg)); 1680 break; 1681 case MAT_ROW_ORIENTED: 1682 MatCheckPreallocated(A, 1); 1683 a->roworiented = flg; 1684 1685 PetscCall(MatSetOption(a->A, op, flg)); 1686 PetscCall(MatSetOption(a->B, op, flg)); 1687 break; 1688 case MAT_FORCE_DIAGONAL_ENTRIES: 1689 case MAT_SORTED_FULL: 1690 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1691 break; 1692 case MAT_IGNORE_OFF_PROC_ENTRIES: 1693 a->donotstash = flg; 1694 break; 1695 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1696 case MAT_SPD: 1697 case MAT_SYMMETRIC: 1698 case MAT_STRUCTURALLY_SYMMETRIC: 1699 case MAT_HERMITIAN: 1700 case MAT_SYMMETRY_ETERNAL: 1701 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1702 case MAT_SPD_ETERNAL: 1703 /* if the diagonal matrix is square it inherits some of the properties above */ 1704 break; 1705 case MAT_SUBMAT_SINGLEIS: 1706 A->submat_singleis = flg; 1707 break; 1708 case MAT_STRUCTURE_ONLY: 1709 /* The option is handled directly by MatSetOption() */ 1710 break; 1711 default: 1712 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1713 } 1714 PetscFunctionReturn(PETSC_SUCCESS); 1715 } 1716 1717 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1718 { 1719 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1720 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1721 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1722 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1723 PetscInt *cmap, *idx_p; 1724 1725 PetscFunctionBegin; 1726 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1727 mat->getrowactive = PETSC_TRUE; 1728 1729 if (!mat->rowvalues && (idx || v)) { 1730 /* 1731 allocate enough space to hold information from the longest row. 1732 */ 1733 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1734 PetscInt max = 1, tmp; 1735 for (i = 0; i < matin->rmap->n; i++) { 1736 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1737 if (max < tmp) max = tmp; 1738 } 1739 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1740 } 1741 1742 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1743 lrow = row - rstart; 1744 1745 pvA = &vworkA; 1746 pcA = &cworkA; 1747 pvB = &vworkB; 1748 pcB = &cworkB; 1749 if (!v) { 1750 pvA = NULL; 1751 pvB = NULL; 1752 } 1753 if (!idx) { 1754 pcA = NULL; 1755 if (!v) pcB = NULL; 1756 } 1757 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1758 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1759 nztot = nzA + nzB; 1760 1761 cmap = mat->garray; 1762 if (v || idx) { 1763 if (nztot) { 1764 /* Sort by increasing column numbers, assuming A and B already sorted */ 1765 PetscInt imark = -1; 1766 if (v) { 1767 *v = v_p = mat->rowvalues; 1768 for (i = 0; i < nzB; i++) { 1769 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1770 else break; 1771 } 1772 imark = i; 1773 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1774 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1775 } 1776 if (idx) { 1777 *idx = idx_p = mat->rowindices; 1778 if (imark > -1) { 1779 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1780 } else { 1781 for (i = 0; i < nzB; i++) { 1782 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1783 else break; 1784 } 1785 imark = i; 1786 } 1787 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1788 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1789 } 1790 } else { 1791 if (idx) *idx = NULL; 1792 if (v) *v = NULL; 1793 } 1794 } 1795 *nz = nztot; 1796 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1797 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1798 PetscFunctionReturn(PETSC_SUCCESS); 1799 } 1800 1801 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1802 { 1803 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1804 1805 PetscFunctionBegin; 1806 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1807 aij->getrowactive = PETSC_FALSE; 1808 PetscFunctionReturn(PETSC_SUCCESS); 1809 } 1810 1811 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1812 { 1813 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1814 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1815 PetscInt i, j, cstart = mat->cmap->rstart; 1816 PetscReal sum = 0.0; 1817 const MatScalar *v, *amata, *bmata; 1818 1819 PetscFunctionBegin; 1820 if (aij->size == 1) { 1821 PetscCall(MatNorm(aij->A, type, norm)); 1822 } else { 1823 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1824 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1825 if (type == NORM_FROBENIUS) { 1826 v = amata; 1827 for (i = 0; i < amat->nz; i++) { 1828 sum += PetscRealPart(PetscConj(*v) * (*v)); 1829 v++; 1830 } 1831 v = bmata; 1832 for (i = 0; i < bmat->nz; i++) { 1833 sum += PetscRealPart(PetscConj(*v) * (*v)); 1834 v++; 1835 } 1836 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1837 *norm = PetscSqrtReal(*norm); 1838 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1839 } else if (type == NORM_1) { /* max column norm */ 1840 PetscReal *tmp, *tmp2; 1841 PetscInt *jj, *garray = aij->garray; 1842 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1843 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1844 *norm = 0.0; 1845 v = amata; 1846 jj = amat->j; 1847 for (j = 0; j < amat->nz; j++) { 1848 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1849 v++; 1850 } 1851 v = bmata; 1852 jj = bmat->j; 1853 for (j = 0; j < bmat->nz; j++) { 1854 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1855 v++; 1856 } 1857 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1858 for (j = 0; j < mat->cmap->N; j++) { 1859 if (tmp2[j] > *norm) *norm = tmp2[j]; 1860 } 1861 PetscCall(PetscFree(tmp)); 1862 PetscCall(PetscFree(tmp2)); 1863 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1864 } else if (type == NORM_INFINITY) { /* max row norm */ 1865 PetscReal ntemp = 0.0; 1866 for (j = 0; j < aij->A->rmap->n; j++) { 1867 v = amata + amat->i[j]; 1868 sum = 0.0; 1869 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1870 sum += PetscAbsScalar(*v); 1871 v++; 1872 } 1873 v = bmata + bmat->i[j]; 1874 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1875 sum += PetscAbsScalar(*v); 1876 v++; 1877 } 1878 if (sum > ntemp) ntemp = sum; 1879 } 1880 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1881 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1882 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1883 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1884 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1885 } 1886 PetscFunctionReturn(PETSC_SUCCESS); 1887 } 1888 1889 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1890 { 1891 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1892 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1893 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1894 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1895 Mat B, A_diag, *B_diag; 1896 const MatScalar *pbv, *bv; 1897 1898 PetscFunctionBegin; 1899 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1900 ma = A->rmap->n; 1901 na = A->cmap->n; 1902 mb = a->B->rmap->n; 1903 nb = a->B->cmap->n; 1904 ai = Aloc->i; 1905 aj = Aloc->j; 1906 bi = Bloc->i; 1907 bj = Bloc->j; 1908 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1909 PetscInt *d_nnz, *g_nnz, *o_nnz; 1910 PetscSFNode *oloc; 1911 PETSC_UNUSED PetscSF sf; 1912 1913 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1914 /* compute d_nnz for preallocation */ 1915 PetscCall(PetscArrayzero(d_nnz, na)); 1916 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1917 /* compute local off-diagonal contributions */ 1918 PetscCall(PetscArrayzero(g_nnz, nb)); 1919 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1920 /* map those to global */ 1921 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1922 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1923 PetscCall(PetscSFSetFromOptions(sf)); 1924 PetscCall(PetscArrayzero(o_nnz, na)); 1925 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1926 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1927 PetscCall(PetscSFDestroy(&sf)); 1928 1929 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1930 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1931 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1932 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1933 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1934 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1935 } else { 1936 B = *matout; 1937 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1938 } 1939 1940 b = (Mat_MPIAIJ *)B->data; 1941 A_diag = a->A; 1942 B_diag = &b->A; 1943 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1944 A_diag_ncol = A_diag->cmap->N; 1945 B_diag_ilen = sub_B_diag->ilen; 1946 B_diag_i = sub_B_diag->i; 1947 1948 /* Set ilen for diagonal of B */ 1949 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1950 1951 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1952 very quickly (=without using MatSetValues), because all writes are local. */ 1953 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1954 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1955 1956 /* copy over the B part */ 1957 PetscCall(PetscMalloc1(bi[mb], &cols)); 1958 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1959 pbv = bv; 1960 row = A->rmap->rstart; 1961 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1962 cols_tmp = cols; 1963 for (i = 0; i < mb; i++) { 1964 ncol = bi[i + 1] - bi[i]; 1965 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1966 row++; 1967 pbv += ncol; 1968 cols_tmp += ncol; 1969 } 1970 PetscCall(PetscFree(cols)); 1971 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1972 1973 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1974 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1975 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1976 *matout = B; 1977 } else { 1978 PetscCall(MatHeaderMerge(A, &B)); 1979 } 1980 PetscFunctionReturn(PETSC_SUCCESS); 1981 } 1982 1983 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1984 { 1985 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1986 Mat a = aij->A, b = aij->B; 1987 PetscInt s1, s2, s3; 1988 1989 PetscFunctionBegin; 1990 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1991 if (rr) { 1992 PetscCall(VecGetLocalSize(rr, &s1)); 1993 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1994 /* Overlap communication with computation. */ 1995 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1996 } 1997 if (ll) { 1998 PetscCall(VecGetLocalSize(ll, &s1)); 1999 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2000 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2001 } 2002 /* scale the diagonal block */ 2003 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2004 2005 if (rr) { 2006 /* Do a scatter end and then right scale the off-diagonal block */ 2007 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2008 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2009 } 2010 PetscFunctionReturn(PETSC_SUCCESS); 2011 } 2012 2013 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2014 { 2015 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2016 2017 PetscFunctionBegin; 2018 PetscCall(MatSetUnfactored(a->A)); 2019 PetscFunctionReturn(PETSC_SUCCESS); 2020 } 2021 2022 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2023 { 2024 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2025 Mat a, b, c, d; 2026 PetscBool flg; 2027 2028 PetscFunctionBegin; 2029 a = matA->A; 2030 b = matA->B; 2031 c = matB->A; 2032 d = matB->B; 2033 2034 PetscCall(MatEqual(a, c, &flg)); 2035 if (flg) PetscCall(MatEqual(b, d, &flg)); 2036 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2037 PetscFunctionReturn(PETSC_SUCCESS); 2038 } 2039 2040 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2041 { 2042 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2043 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2044 2045 PetscFunctionBegin; 2046 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2047 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2048 /* because of the column compression in the off-processor part of the matrix a->B, 2049 the number of columns in a->B and b->B may be different, hence we cannot call 2050 the MatCopy() directly on the two parts. If need be, we can provide a more 2051 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2052 then copying the submatrices */ 2053 PetscCall(MatCopy_Basic(A, B, str)); 2054 } else { 2055 PetscCall(MatCopy(a->A, b->A, str)); 2056 PetscCall(MatCopy(a->B, b->B, str)); 2057 } 2058 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2059 PetscFunctionReturn(PETSC_SUCCESS); 2060 } 2061 2062 /* 2063 Computes the number of nonzeros per row needed for preallocation when X and Y 2064 have different nonzero structure. 2065 */ 2066 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2067 { 2068 PetscInt i, j, k, nzx, nzy; 2069 2070 PetscFunctionBegin; 2071 /* Set the number of nonzeros in the new matrix */ 2072 for (i = 0; i < m; i++) { 2073 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2074 nzx = xi[i + 1] - xi[i]; 2075 nzy = yi[i + 1] - yi[i]; 2076 nnz[i] = 0; 2077 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2078 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2079 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2080 nnz[i]++; 2081 } 2082 for (; k < nzy; k++) nnz[i]++; 2083 } 2084 PetscFunctionReturn(PETSC_SUCCESS); 2085 } 2086 2087 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2088 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2089 { 2090 PetscInt m = Y->rmap->N; 2091 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2092 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2093 2094 PetscFunctionBegin; 2095 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2096 PetscFunctionReturn(PETSC_SUCCESS); 2097 } 2098 2099 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2100 { 2101 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2102 2103 PetscFunctionBegin; 2104 if (str == SAME_NONZERO_PATTERN) { 2105 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2106 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2107 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2108 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2109 } else { 2110 Mat B; 2111 PetscInt *nnz_d, *nnz_o; 2112 2113 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2114 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2115 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2116 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2117 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2118 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2119 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2120 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2121 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2122 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2123 PetscCall(MatHeaderMerge(Y, &B)); 2124 PetscCall(PetscFree(nnz_d)); 2125 PetscCall(PetscFree(nnz_o)); 2126 } 2127 PetscFunctionReturn(PETSC_SUCCESS); 2128 } 2129 2130 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2131 2132 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2133 { 2134 PetscFunctionBegin; 2135 if (PetscDefined(USE_COMPLEX)) { 2136 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2137 2138 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2139 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2140 } 2141 PetscFunctionReturn(PETSC_SUCCESS); 2142 } 2143 2144 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2145 { 2146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2147 2148 PetscFunctionBegin; 2149 PetscCall(MatRealPart(a->A)); 2150 PetscCall(MatRealPart(a->B)); 2151 PetscFunctionReturn(PETSC_SUCCESS); 2152 } 2153 2154 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2155 { 2156 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2157 2158 PetscFunctionBegin; 2159 PetscCall(MatImaginaryPart(a->A)); 2160 PetscCall(MatImaginaryPart(a->B)); 2161 PetscFunctionReturn(PETSC_SUCCESS); 2162 } 2163 2164 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2165 { 2166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2167 PetscInt i, *idxb = NULL, m = A->rmap->n; 2168 PetscScalar *va, *vv; 2169 Vec vB, vA; 2170 const PetscScalar *vb; 2171 2172 PetscFunctionBegin; 2173 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2174 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2175 2176 PetscCall(VecGetArrayWrite(vA, &va)); 2177 if (idx) { 2178 for (i = 0; i < m; i++) { 2179 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2180 } 2181 } 2182 2183 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2184 PetscCall(PetscMalloc1(m, &idxb)); 2185 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2186 2187 PetscCall(VecGetArrayWrite(v, &vv)); 2188 PetscCall(VecGetArrayRead(vB, &vb)); 2189 for (i = 0; i < m; i++) { 2190 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2191 vv[i] = vb[i]; 2192 if (idx) idx[i] = a->garray[idxb[i]]; 2193 } else { 2194 vv[i] = va[i]; 2195 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2196 } 2197 } 2198 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2199 PetscCall(VecRestoreArrayWrite(vA, &va)); 2200 PetscCall(VecRestoreArrayRead(vB, &vb)); 2201 PetscCall(PetscFree(idxb)); 2202 PetscCall(VecDestroy(&vA)); 2203 PetscCall(VecDestroy(&vB)); 2204 PetscFunctionReturn(PETSC_SUCCESS); 2205 } 2206 2207 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2208 { 2209 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2210 PetscInt m = A->rmap->n, n = A->cmap->n; 2211 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2212 PetscInt *cmap = mat->garray; 2213 PetscInt *diagIdx, *offdiagIdx; 2214 Vec diagV, offdiagV; 2215 PetscScalar *a, *diagA, *offdiagA; 2216 const PetscScalar *ba, *bav; 2217 PetscInt r, j, col, ncols, *bi, *bj; 2218 Mat B = mat->B; 2219 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2220 2221 PetscFunctionBegin; 2222 /* When a process holds entire A and other processes have no entry */ 2223 if (A->cmap->N == n) { 2224 PetscCall(VecGetArrayWrite(v, &diagA)); 2225 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2226 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2227 PetscCall(VecDestroy(&diagV)); 2228 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2229 PetscFunctionReturn(PETSC_SUCCESS); 2230 } else if (n == 0) { 2231 if (m) { 2232 PetscCall(VecGetArrayWrite(v, &a)); 2233 for (r = 0; r < m; r++) { 2234 a[r] = 0.0; 2235 if (idx) idx[r] = -1; 2236 } 2237 PetscCall(VecRestoreArrayWrite(v, &a)); 2238 } 2239 PetscFunctionReturn(PETSC_SUCCESS); 2240 } 2241 2242 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2243 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2244 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2245 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2246 2247 /* Get offdiagIdx[] for implicit 0.0 */ 2248 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2249 ba = bav; 2250 bi = b->i; 2251 bj = b->j; 2252 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2253 for (r = 0; r < m; r++) { 2254 ncols = bi[r + 1] - bi[r]; 2255 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2256 offdiagA[r] = *ba; 2257 offdiagIdx[r] = cmap[0]; 2258 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2259 offdiagA[r] = 0.0; 2260 2261 /* Find first hole in the cmap */ 2262 for (j = 0; j < ncols; j++) { 2263 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2264 if (col > j && j < cstart) { 2265 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2266 break; 2267 } else if (col > j + n && j >= cstart) { 2268 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2269 break; 2270 } 2271 } 2272 if (j == ncols && ncols < A->cmap->N - n) { 2273 /* a hole is outside compressed Bcols */ 2274 if (ncols == 0) { 2275 if (cstart) { 2276 offdiagIdx[r] = 0; 2277 } else offdiagIdx[r] = cend; 2278 } else { /* ncols > 0 */ 2279 offdiagIdx[r] = cmap[ncols - 1] + 1; 2280 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2281 } 2282 } 2283 } 2284 2285 for (j = 0; j < ncols; j++) { 2286 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2287 offdiagA[r] = *ba; 2288 offdiagIdx[r] = cmap[*bj]; 2289 } 2290 ba++; 2291 bj++; 2292 } 2293 } 2294 2295 PetscCall(VecGetArrayWrite(v, &a)); 2296 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2297 for (r = 0; r < m; ++r) { 2298 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2299 a[r] = diagA[r]; 2300 if (idx) idx[r] = cstart + diagIdx[r]; 2301 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2302 a[r] = diagA[r]; 2303 if (idx) { 2304 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2305 idx[r] = cstart + diagIdx[r]; 2306 } else idx[r] = offdiagIdx[r]; 2307 } 2308 } else { 2309 a[r] = offdiagA[r]; 2310 if (idx) idx[r] = offdiagIdx[r]; 2311 } 2312 } 2313 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2314 PetscCall(VecRestoreArrayWrite(v, &a)); 2315 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2316 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2317 PetscCall(VecDestroy(&diagV)); 2318 PetscCall(VecDestroy(&offdiagV)); 2319 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2320 PetscFunctionReturn(PETSC_SUCCESS); 2321 } 2322 2323 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2324 { 2325 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2326 PetscInt m = A->rmap->n, n = A->cmap->n; 2327 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2328 PetscInt *cmap = mat->garray; 2329 PetscInt *diagIdx, *offdiagIdx; 2330 Vec diagV, offdiagV; 2331 PetscScalar *a, *diagA, *offdiagA; 2332 const PetscScalar *ba, *bav; 2333 PetscInt r, j, col, ncols, *bi, *bj; 2334 Mat B = mat->B; 2335 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2336 2337 PetscFunctionBegin; 2338 /* When a process holds entire A and other processes have no entry */ 2339 if (A->cmap->N == n) { 2340 PetscCall(VecGetArrayWrite(v, &diagA)); 2341 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2342 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2343 PetscCall(VecDestroy(&diagV)); 2344 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2345 PetscFunctionReturn(PETSC_SUCCESS); 2346 } else if (n == 0) { 2347 if (m) { 2348 PetscCall(VecGetArrayWrite(v, &a)); 2349 for (r = 0; r < m; r++) { 2350 a[r] = PETSC_MAX_REAL; 2351 if (idx) idx[r] = -1; 2352 } 2353 PetscCall(VecRestoreArrayWrite(v, &a)); 2354 } 2355 PetscFunctionReturn(PETSC_SUCCESS); 2356 } 2357 2358 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2359 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2360 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2361 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2362 2363 /* Get offdiagIdx[] for implicit 0.0 */ 2364 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2365 ba = bav; 2366 bi = b->i; 2367 bj = b->j; 2368 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2369 for (r = 0; r < m; r++) { 2370 ncols = bi[r + 1] - bi[r]; 2371 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2372 offdiagA[r] = *ba; 2373 offdiagIdx[r] = cmap[0]; 2374 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2375 offdiagA[r] = 0.0; 2376 2377 /* Find first hole in the cmap */ 2378 for (j = 0; j < ncols; j++) { 2379 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2380 if (col > j && j < cstart) { 2381 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2382 break; 2383 } else if (col > j + n && j >= cstart) { 2384 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2385 break; 2386 } 2387 } 2388 if (j == ncols && ncols < A->cmap->N - n) { 2389 /* a hole is outside compressed Bcols */ 2390 if (ncols == 0) { 2391 if (cstart) { 2392 offdiagIdx[r] = 0; 2393 } else offdiagIdx[r] = cend; 2394 } else { /* ncols > 0 */ 2395 offdiagIdx[r] = cmap[ncols - 1] + 1; 2396 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2397 } 2398 } 2399 } 2400 2401 for (j = 0; j < ncols; j++) { 2402 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2403 offdiagA[r] = *ba; 2404 offdiagIdx[r] = cmap[*bj]; 2405 } 2406 ba++; 2407 bj++; 2408 } 2409 } 2410 2411 PetscCall(VecGetArrayWrite(v, &a)); 2412 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2413 for (r = 0; r < m; ++r) { 2414 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2415 a[r] = diagA[r]; 2416 if (idx) idx[r] = cstart + diagIdx[r]; 2417 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2418 a[r] = diagA[r]; 2419 if (idx) { 2420 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2421 idx[r] = cstart + diagIdx[r]; 2422 } else idx[r] = offdiagIdx[r]; 2423 } 2424 } else { 2425 a[r] = offdiagA[r]; 2426 if (idx) idx[r] = offdiagIdx[r]; 2427 } 2428 } 2429 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2430 PetscCall(VecRestoreArrayWrite(v, &a)); 2431 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2432 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2433 PetscCall(VecDestroy(&diagV)); 2434 PetscCall(VecDestroy(&offdiagV)); 2435 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2436 PetscFunctionReturn(PETSC_SUCCESS); 2437 } 2438 2439 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2440 { 2441 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2442 PetscInt m = A->rmap->n, n = A->cmap->n; 2443 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2444 PetscInt *cmap = mat->garray; 2445 PetscInt *diagIdx, *offdiagIdx; 2446 Vec diagV, offdiagV; 2447 PetscScalar *a, *diagA, *offdiagA; 2448 const PetscScalar *ba, *bav; 2449 PetscInt r, j, col, ncols, *bi, *bj; 2450 Mat B = mat->B; 2451 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2452 2453 PetscFunctionBegin; 2454 /* When a process holds entire A and other processes have no entry */ 2455 if (A->cmap->N == n) { 2456 PetscCall(VecGetArrayWrite(v, &diagA)); 2457 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2458 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2459 PetscCall(VecDestroy(&diagV)); 2460 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2461 PetscFunctionReturn(PETSC_SUCCESS); 2462 } else if (n == 0) { 2463 if (m) { 2464 PetscCall(VecGetArrayWrite(v, &a)); 2465 for (r = 0; r < m; r++) { 2466 a[r] = PETSC_MIN_REAL; 2467 if (idx) idx[r] = -1; 2468 } 2469 PetscCall(VecRestoreArrayWrite(v, &a)); 2470 } 2471 PetscFunctionReturn(PETSC_SUCCESS); 2472 } 2473 2474 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2475 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2476 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2477 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2478 2479 /* Get offdiagIdx[] for implicit 0.0 */ 2480 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2481 ba = bav; 2482 bi = b->i; 2483 bj = b->j; 2484 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2485 for (r = 0; r < m; r++) { 2486 ncols = bi[r + 1] - bi[r]; 2487 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2488 offdiagA[r] = *ba; 2489 offdiagIdx[r] = cmap[0]; 2490 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2491 offdiagA[r] = 0.0; 2492 2493 /* Find first hole in the cmap */ 2494 for (j = 0; j < ncols; j++) { 2495 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2496 if (col > j && j < cstart) { 2497 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2498 break; 2499 } else if (col > j + n && j >= cstart) { 2500 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2501 break; 2502 } 2503 } 2504 if (j == ncols && ncols < A->cmap->N - n) { 2505 /* a hole is outside compressed Bcols */ 2506 if (ncols == 0) { 2507 if (cstart) { 2508 offdiagIdx[r] = 0; 2509 } else offdiagIdx[r] = cend; 2510 } else { /* ncols > 0 */ 2511 offdiagIdx[r] = cmap[ncols - 1] + 1; 2512 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2513 } 2514 } 2515 } 2516 2517 for (j = 0; j < ncols; j++) { 2518 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2519 offdiagA[r] = *ba; 2520 offdiagIdx[r] = cmap[*bj]; 2521 } 2522 ba++; 2523 bj++; 2524 } 2525 } 2526 2527 PetscCall(VecGetArrayWrite(v, &a)); 2528 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2529 for (r = 0; r < m; ++r) { 2530 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2531 a[r] = diagA[r]; 2532 if (idx) idx[r] = cstart + diagIdx[r]; 2533 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2534 a[r] = diagA[r]; 2535 if (idx) { 2536 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2537 idx[r] = cstart + diagIdx[r]; 2538 } else idx[r] = offdiagIdx[r]; 2539 } 2540 } else { 2541 a[r] = offdiagA[r]; 2542 if (idx) idx[r] = offdiagIdx[r]; 2543 } 2544 } 2545 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2546 PetscCall(VecRestoreArrayWrite(v, &a)); 2547 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2548 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2549 PetscCall(VecDestroy(&diagV)); 2550 PetscCall(VecDestroy(&offdiagV)); 2551 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2552 PetscFunctionReturn(PETSC_SUCCESS); 2553 } 2554 2555 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2556 { 2557 Mat *dummy; 2558 2559 PetscFunctionBegin; 2560 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2561 *newmat = *dummy; 2562 PetscCall(PetscFree(dummy)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2567 { 2568 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2572 A->factorerrortype = a->A->factorerrortype; 2573 PetscFunctionReturn(PETSC_SUCCESS); 2574 } 2575 2576 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2577 { 2578 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2579 2580 PetscFunctionBegin; 2581 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2582 PetscCall(MatSetRandom(aij->A, rctx)); 2583 if (x->assembled) { 2584 PetscCall(MatSetRandom(aij->B, rctx)); 2585 } else { 2586 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2587 } 2588 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2589 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2590 PetscFunctionReturn(PETSC_SUCCESS); 2591 } 2592 2593 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2594 { 2595 PetscFunctionBegin; 2596 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2597 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2598 PetscFunctionReturn(PETSC_SUCCESS); 2599 } 2600 2601 /*@ 2602 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2603 2604 Not Collective 2605 2606 Input Parameter: 2607 . A - the matrix 2608 2609 Output Parameter: 2610 . nz - the number of nonzeros 2611 2612 Level: advanced 2613 2614 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `Mat` 2615 @*/ 2616 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2617 { 2618 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2619 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2620 PetscBool isaij; 2621 2622 PetscFunctionBegin; 2623 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2624 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2625 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2626 PetscFunctionReturn(PETSC_SUCCESS); 2627 } 2628 2629 /*@ 2630 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2631 2632 Collective 2633 2634 Input Parameters: 2635 + A - the matrix 2636 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2637 2638 Level: advanced 2639 2640 .seealso: [](ch_matrices), `Mat`, `Mat`, `MATMPIAIJ` 2641 @*/ 2642 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2643 { 2644 PetscFunctionBegin; 2645 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2646 PetscFunctionReturn(PETSC_SUCCESS); 2647 } 2648 2649 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2650 { 2651 PetscBool sc = PETSC_FALSE, flg; 2652 2653 PetscFunctionBegin; 2654 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2655 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2656 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2657 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2658 PetscOptionsHeadEnd(); 2659 PetscFunctionReturn(PETSC_SUCCESS); 2660 } 2661 2662 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2663 { 2664 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2665 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2666 2667 PetscFunctionBegin; 2668 if (!Y->preallocated) { 2669 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2670 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2671 PetscInt nonew = aij->nonew; 2672 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2673 aij->nonew = nonew; 2674 } 2675 PetscCall(MatShift_Basic(Y, a)); 2676 PetscFunctionReturn(PETSC_SUCCESS); 2677 } 2678 2679 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2680 { 2681 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2682 2683 PetscFunctionBegin; 2684 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2685 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2686 if (d) { 2687 PetscInt rstart; 2688 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2689 *d += rstart; 2690 } 2691 PetscFunctionReturn(PETSC_SUCCESS); 2692 } 2693 2694 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2695 { 2696 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2697 2698 PetscFunctionBegin; 2699 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2700 PetscFunctionReturn(PETSC_SUCCESS); 2701 } 2702 2703 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2704 { 2705 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2706 2707 PetscFunctionBegin; 2708 PetscCall(MatEliminateZeros(a->A)); 2709 PetscCall(MatEliminateZeros(a->B)); 2710 PetscFunctionReturn(PETSC_SUCCESS); 2711 } 2712 2713 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2714 MatGetRow_MPIAIJ, 2715 MatRestoreRow_MPIAIJ, 2716 MatMult_MPIAIJ, 2717 /* 4*/ MatMultAdd_MPIAIJ, 2718 MatMultTranspose_MPIAIJ, 2719 MatMultTransposeAdd_MPIAIJ, 2720 NULL, 2721 NULL, 2722 NULL, 2723 /*10*/ NULL, 2724 NULL, 2725 NULL, 2726 MatSOR_MPIAIJ, 2727 MatTranspose_MPIAIJ, 2728 /*15*/ MatGetInfo_MPIAIJ, 2729 MatEqual_MPIAIJ, 2730 MatGetDiagonal_MPIAIJ, 2731 MatDiagonalScale_MPIAIJ, 2732 MatNorm_MPIAIJ, 2733 /*20*/ MatAssemblyBegin_MPIAIJ, 2734 MatAssemblyEnd_MPIAIJ, 2735 MatSetOption_MPIAIJ, 2736 MatZeroEntries_MPIAIJ, 2737 /*24*/ MatZeroRows_MPIAIJ, 2738 NULL, 2739 NULL, 2740 NULL, 2741 NULL, 2742 /*29*/ MatSetUp_MPI_Hash, 2743 NULL, 2744 NULL, 2745 MatGetDiagonalBlock_MPIAIJ, 2746 NULL, 2747 /*34*/ MatDuplicate_MPIAIJ, 2748 NULL, 2749 NULL, 2750 NULL, 2751 NULL, 2752 /*39*/ MatAXPY_MPIAIJ, 2753 MatCreateSubMatrices_MPIAIJ, 2754 MatIncreaseOverlap_MPIAIJ, 2755 MatGetValues_MPIAIJ, 2756 MatCopy_MPIAIJ, 2757 /*44*/ MatGetRowMax_MPIAIJ, 2758 MatScale_MPIAIJ, 2759 MatShift_MPIAIJ, 2760 MatDiagonalSet_MPIAIJ, 2761 MatZeroRowsColumns_MPIAIJ, 2762 /*49*/ MatSetRandom_MPIAIJ, 2763 MatGetRowIJ_MPIAIJ, 2764 MatRestoreRowIJ_MPIAIJ, 2765 NULL, 2766 NULL, 2767 /*54*/ MatFDColoringCreate_MPIXAIJ, 2768 NULL, 2769 MatSetUnfactored_MPIAIJ, 2770 MatPermute_MPIAIJ, 2771 NULL, 2772 /*59*/ MatCreateSubMatrix_MPIAIJ, 2773 MatDestroy_MPIAIJ, 2774 MatView_MPIAIJ, 2775 NULL, 2776 NULL, 2777 /*64*/ NULL, 2778 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2779 NULL, 2780 NULL, 2781 NULL, 2782 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2783 MatGetRowMinAbs_MPIAIJ, 2784 NULL, 2785 NULL, 2786 NULL, 2787 NULL, 2788 /*75*/ MatFDColoringApply_AIJ, 2789 MatSetFromOptions_MPIAIJ, 2790 NULL, 2791 NULL, 2792 MatFindZeroDiagonals_MPIAIJ, 2793 /*80*/ NULL, 2794 NULL, 2795 NULL, 2796 /*83*/ MatLoad_MPIAIJ, 2797 MatIsSymmetric_MPIAIJ, 2798 NULL, 2799 NULL, 2800 NULL, 2801 NULL, 2802 /*89*/ NULL, 2803 NULL, 2804 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2805 NULL, 2806 NULL, 2807 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2808 NULL, 2809 NULL, 2810 NULL, 2811 MatBindToCPU_MPIAIJ, 2812 /*99*/ MatProductSetFromOptions_MPIAIJ, 2813 NULL, 2814 NULL, 2815 MatConjugate_MPIAIJ, 2816 NULL, 2817 /*104*/ MatSetValuesRow_MPIAIJ, 2818 MatRealPart_MPIAIJ, 2819 MatImaginaryPart_MPIAIJ, 2820 NULL, 2821 NULL, 2822 /*109*/ NULL, 2823 NULL, 2824 MatGetRowMin_MPIAIJ, 2825 NULL, 2826 MatMissingDiagonal_MPIAIJ, 2827 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2828 NULL, 2829 MatGetGhosts_MPIAIJ, 2830 NULL, 2831 NULL, 2832 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2833 NULL, 2834 NULL, 2835 NULL, 2836 MatGetMultiProcBlock_MPIAIJ, 2837 /*124*/ MatFindNonzeroRows_MPIAIJ, 2838 MatGetColumnReductions_MPIAIJ, 2839 MatInvertBlockDiagonal_MPIAIJ, 2840 MatInvertVariableBlockDiagonal_MPIAIJ, 2841 MatCreateSubMatricesMPI_MPIAIJ, 2842 /*129*/ NULL, 2843 NULL, 2844 NULL, 2845 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2846 NULL, 2847 /*134*/ NULL, 2848 NULL, 2849 NULL, 2850 NULL, 2851 NULL, 2852 /*139*/ MatSetBlockSizes_MPIAIJ, 2853 NULL, 2854 NULL, 2855 MatFDColoringSetUp_MPIXAIJ, 2856 MatFindOffBlockDiagonalEntries_MPIAIJ, 2857 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2858 /*145*/ NULL, 2859 NULL, 2860 NULL, 2861 MatCreateGraph_Simple_AIJ, 2862 NULL, 2863 /*150*/ NULL, 2864 MatEliminateZeros_MPIAIJ}; 2865 2866 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2867 { 2868 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2869 2870 PetscFunctionBegin; 2871 PetscCall(MatStoreValues(aij->A)); 2872 PetscCall(MatStoreValues(aij->B)); 2873 PetscFunctionReturn(PETSC_SUCCESS); 2874 } 2875 2876 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2877 { 2878 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2879 2880 PetscFunctionBegin; 2881 PetscCall(MatRetrieveValues(aij->A)); 2882 PetscCall(MatRetrieveValues(aij->B)); 2883 PetscFunctionReturn(PETSC_SUCCESS); 2884 } 2885 2886 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2887 { 2888 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2889 PetscMPIInt size; 2890 2891 PetscFunctionBegin; 2892 if (B->hash_active) { 2893 B->ops[0] = b->cops; 2894 B->hash_active = PETSC_FALSE; 2895 } 2896 PetscCall(PetscLayoutSetUp(B->rmap)); 2897 PetscCall(PetscLayoutSetUp(B->cmap)); 2898 2899 #if defined(PETSC_USE_CTABLE) 2900 PetscCall(PetscHMapIDestroy(&b->colmap)); 2901 #else 2902 PetscCall(PetscFree(b->colmap)); 2903 #endif 2904 PetscCall(PetscFree(b->garray)); 2905 PetscCall(VecDestroy(&b->lvec)); 2906 PetscCall(VecScatterDestroy(&b->Mvctx)); 2907 2908 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2909 PetscCall(MatDestroy(&b->B)); 2910 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2911 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2912 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2913 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2914 2915 PetscCall(MatDestroy(&b->A)); 2916 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2917 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2918 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2919 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2920 2921 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2922 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2923 B->preallocated = PETSC_TRUE; 2924 B->was_assembled = PETSC_FALSE; 2925 B->assembled = PETSC_FALSE; 2926 PetscFunctionReturn(PETSC_SUCCESS); 2927 } 2928 2929 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2930 { 2931 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2932 2933 PetscFunctionBegin; 2934 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2935 PetscCall(PetscLayoutSetUp(B->rmap)); 2936 PetscCall(PetscLayoutSetUp(B->cmap)); 2937 2938 #if defined(PETSC_USE_CTABLE) 2939 PetscCall(PetscHMapIDestroy(&b->colmap)); 2940 #else 2941 PetscCall(PetscFree(b->colmap)); 2942 #endif 2943 PetscCall(PetscFree(b->garray)); 2944 PetscCall(VecDestroy(&b->lvec)); 2945 PetscCall(VecScatterDestroy(&b->Mvctx)); 2946 2947 PetscCall(MatResetPreallocation(b->A)); 2948 PetscCall(MatResetPreallocation(b->B)); 2949 B->preallocated = PETSC_TRUE; 2950 B->was_assembled = PETSC_FALSE; 2951 B->assembled = PETSC_FALSE; 2952 PetscFunctionReturn(PETSC_SUCCESS); 2953 } 2954 2955 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2956 { 2957 Mat mat; 2958 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2959 2960 PetscFunctionBegin; 2961 *newmat = NULL; 2962 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2963 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2964 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2965 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2966 a = (Mat_MPIAIJ *)mat->data; 2967 2968 mat->factortype = matin->factortype; 2969 mat->assembled = matin->assembled; 2970 mat->insertmode = NOT_SET_VALUES; 2971 mat->preallocated = matin->preallocated; 2972 2973 a->size = oldmat->size; 2974 a->rank = oldmat->rank; 2975 a->donotstash = oldmat->donotstash; 2976 a->roworiented = oldmat->roworiented; 2977 a->rowindices = NULL; 2978 a->rowvalues = NULL; 2979 a->getrowactive = PETSC_FALSE; 2980 2981 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2982 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2983 2984 if (oldmat->colmap) { 2985 #if defined(PETSC_USE_CTABLE) 2986 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2987 #else 2988 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2989 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2990 #endif 2991 } else a->colmap = NULL; 2992 if (oldmat->garray) { 2993 PetscInt len; 2994 len = oldmat->B->cmap->n; 2995 PetscCall(PetscMalloc1(len + 1, &a->garray)); 2996 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2997 } else a->garray = NULL; 2998 2999 /* It may happen MatDuplicate is called with a non-assembled matrix 3000 In fact, MatDuplicate only requires the matrix to be preallocated 3001 This may happen inside a DMCreateMatrix_Shell */ 3002 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3003 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3004 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3005 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3006 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3007 *newmat = mat; 3008 PetscFunctionReturn(PETSC_SUCCESS); 3009 } 3010 3011 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3012 { 3013 PetscBool isbinary, ishdf5; 3014 3015 PetscFunctionBegin; 3016 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3017 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3018 /* force binary viewer to load .info file if it has not yet done so */ 3019 PetscCall(PetscViewerSetUp(viewer)); 3020 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3021 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3022 if (isbinary) { 3023 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3024 } else if (ishdf5) { 3025 #if defined(PETSC_HAVE_HDF5) 3026 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3027 #else 3028 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3029 #endif 3030 } else { 3031 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3032 } 3033 PetscFunctionReturn(PETSC_SUCCESS); 3034 } 3035 3036 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3037 { 3038 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3039 PetscInt *rowidxs, *colidxs; 3040 PetscScalar *matvals; 3041 3042 PetscFunctionBegin; 3043 PetscCall(PetscViewerSetUp(viewer)); 3044 3045 /* read in matrix header */ 3046 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3047 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3048 M = header[1]; 3049 N = header[2]; 3050 nz = header[3]; 3051 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3052 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3053 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3054 3055 /* set block sizes from the viewer's .info file */ 3056 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3057 /* set global sizes if not set already */ 3058 if (mat->rmap->N < 0) mat->rmap->N = M; 3059 if (mat->cmap->N < 0) mat->cmap->N = N; 3060 PetscCall(PetscLayoutSetUp(mat->rmap)); 3061 PetscCall(PetscLayoutSetUp(mat->cmap)); 3062 3063 /* check if the matrix sizes are correct */ 3064 PetscCall(MatGetSize(mat, &rows, &cols)); 3065 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3066 3067 /* read in row lengths and build row indices */ 3068 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3069 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3070 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3071 rowidxs[0] = 0; 3072 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3073 if (nz != PETSC_MAX_INT) { 3074 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3075 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3076 } 3077 3078 /* read in column indices and matrix values */ 3079 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3080 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3081 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3082 /* store matrix indices and values */ 3083 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3084 PetscCall(PetscFree(rowidxs)); 3085 PetscCall(PetscFree2(colidxs, matvals)); 3086 PetscFunctionReturn(PETSC_SUCCESS); 3087 } 3088 3089 /* Not scalable because of ISAllGather() unless getting all columns. */ 3090 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3091 { 3092 IS iscol_local; 3093 PetscBool isstride; 3094 PetscMPIInt lisstride = 0, gisstride; 3095 3096 PetscFunctionBegin; 3097 /* check if we are grabbing all columns*/ 3098 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3099 3100 if (isstride) { 3101 PetscInt start, len, mstart, mlen; 3102 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3103 PetscCall(ISGetLocalSize(iscol, &len)); 3104 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3105 if (mstart == start && mlen - mstart == len) lisstride = 1; 3106 } 3107 3108 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3109 if (gisstride) { 3110 PetscInt N; 3111 PetscCall(MatGetSize(mat, NULL, &N)); 3112 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3113 PetscCall(ISSetIdentity(iscol_local)); 3114 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3115 } else { 3116 PetscInt cbs; 3117 PetscCall(ISGetBlockSize(iscol, &cbs)); 3118 PetscCall(ISAllGather(iscol, &iscol_local)); 3119 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3120 } 3121 3122 *isseq = iscol_local; 3123 PetscFunctionReturn(PETSC_SUCCESS); 3124 } 3125 3126 /* 3127 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3128 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3129 3130 Input Parameters: 3131 + mat - matrix 3132 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3133 i.e., mat->rstart <= isrow[i] < mat->rend 3134 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3135 i.e., mat->cstart <= iscol[i] < mat->cend 3136 3137 Output Parameters: 3138 + isrow_d - sequential row index set for retrieving mat->A 3139 . iscol_d - sequential column index set for retrieving mat->A 3140 . iscol_o - sequential column index set for retrieving mat->B 3141 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3142 */ 3143 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3144 { 3145 Vec x, cmap; 3146 const PetscInt *is_idx; 3147 PetscScalar *xarray, *cmaparray; 3148 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3150 Mat B = a->B; 3151 Vec lvec = a->lvec, lcmap; 3152 PetscInt i, cstart, cend, Bn = B->cmap->N; 3153 MPI_Comm comm; 3154 VecScatter Mvctx = a->Mvctx; 3155 3156 PetscFunctionBegin; 3157 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3158 PetscCall(ISGetLocalSize(iscol, &ncols)); 3159 3160 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3161 PetscCall(MatCreateVecs(mat, &x, NULL)); 3162 PetscCall(VecSet(x, -1.0)); 3163 PetscCall(VecDuplicate(x, &cmap)); 3164 PetscCall(VecSet(cmap, -1.0)); 3165 3166 /* Get start indices */ 3167 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3168 isstart -= ncols; 3169 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3170 3171 PetscCall(ISGetIndices(iscol, &is_idx)); 3172 PetscCall(VecGetArray(x, &xarray)); 3173 PetscCall(VecGetArray(cmap, &cmaparray)); 3174 PetscCall(PetscMalloc1(ncols, &idx)); 3175 for (i = 0; i < ncols; i++) { 3176 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3177 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3178 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3179 } 3180 PetscCall(VecRestoreArray(x, &xarray)); 3181 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3182 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3183 3184 /* Get iscol_d */ 3185 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3186 PetscCall(ISGetBlockSize(iscol, &i)); 3187 PetscCall(ISSetBlockSize(*iscol_d, i)); 3188 3189 /* Get isrow_d */ 3190 PetscCall(ISGetLocalSize(isrow, &m)); 3191 rstart = mat->rmap->rstart; 3192 PetscCall(PetscMalloc1(m, &idx)); 3193 PetscCall(ISGetIndices(isrow, &is_idx)); 3194 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3195 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3196 3197 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3198 PetscCall(ISGetBlockSize(isrow, &i)); 3199 PetscCall(ISSetBlockSize(*isrow_d, i)); 3200 3201 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3202 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3203 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3204 3205 PetscCall(VecDuplicate(lvec, &lcmap)); 3206 3207 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3208 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3209 3210 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3211 /* off-process column indices */ 3212 count = 0; 3213 PetscCall(PetscMalloc1(Bn, &idx)); 3214 PetscCall(PetscMalloc1(Bn, &cmap1)); 3215 3216 PetscCall(VecGetArray(lvec, &xarray)); 3217 PetscCall(VecGetArray(lcmap, &cmaparray)); 3218 for (i = 0; i < Bn; i++) { 3219 if (PetscRealPart(xarray[i]) > -1.0) { 3220 idx[count] = i; /* local column index in off-diagonal part B */ 3221 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3222 count++; 3223 } 3224 } 3225 PetscCall(VecRestoreArray(lvec, &xarray)); 3226 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3227 3228 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3229 /* cannot ensure iscol_o has same blocksize as iscol! */ 3230 3231 PetscCall(PetscFree(idx)); 3232 *garray = cmap1; 3233 3234 PetscCall(VecDestroy(&x)); 3235 PetscCall(VecDestroy(&cmap)); 3236 PetscCall(VecDestroy(&lcmap)); 3237 PetscFunctionReturn(PETSC_SUCCESS); 3238 } 3239 3240 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3241 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3242 { 3243 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3244 Mat M = NULL; 3245 MPI_Comm comm; 3246 IS iscol_d, isrow_d, iscol_o; 3247 Mat Asub = NULL, Bsub = NULL; 3248 PetscInt n; 3249 3250 PetscFunctionBegin; 3251 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3252 3253 if (call == MAT_REUSE_MATRIX) { 3254 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3255 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3256 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3257 3258 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3259 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3260 3261 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3262 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3263 3264 /* Update diagonal and off-diagonal portions of submat */ 3265 asub = (Mat_MPIAIJ *)(*submat)->data; 3266 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3267 PetscCall(ISGetLocalSize(iscol_o, &n)); 3268 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3269 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3270 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3271 3272 } else { /* call == MAT_INITIAL_MATRIX) */ 3273 const PetscInt *garray; 3274 PetscInt BsubN; 3275 3276 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3277 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3278 3279 /* Create local submatrices Asub and Bsub */ 3280 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3281 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3282 3283 /* Create submatrix M */ 3284 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3285 3286 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3287 asub = (Mat_MPIAIJ *)M->data; 3288 3289 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3290 n = asub->B->cmap->N; 3291 if (BsubN > n) { 3292 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3293 const PetscInt *idx; 3294 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3295 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3296 3297 PetscCall(PetscMalloc1(n, &idx_new)); 3298 j = 0; 3299 PetscCall(ISGetIndices(iscol_o, &idx)); 3300 for (i = 0; i < n; i++) { 3301 if (j >= BsubN) break; 3302 while (subgarray[i] > garray[j]) j++; 3303 3304 if (subgarray[i] == garray[j]) { 3305 idx_new[i] = idx[j++]; 3306 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3307 } 3308 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3309 3310 PetscCall(ISDestroy(&iscol_o)); 3311 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3312 3313 } else if (BsubN < n) { 3314 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3315 } 3316 3317 PetscCall(PetscFree(garray)); 3318 *submat = M; 3319 3320 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3321 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3322 PetscCall(ISDestroy(&isrow_d)); 3323 3324 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3325 PetscCall(ISDestroy(&iscol_d)); 3326 3327 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3328 PetscCall(ISDestroy(&iscol_o)); 3329 } 3330 PetscFunctionReturn(PETSC_SUCCESS); 3331 } 3332 3333 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3334 { 3335 IS iscol_local = NULL, isrow_d; 3336 PetscInt csize; 3337 PetscInt n, i, j, start, end; 3338 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3339 MPI_Comm comm; 3340 3341 PetscFunctionBegin; 3342 /* If isrow has same processor distribution as mat, 3343 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3344 if (call == MAT_REUSE_MATRIX) { 3345 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3346 if (isrow_d) { 3347 sameRowDist = PETSC_TRUE; 3348 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3349 } else { 3350 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3351 if (iscol_local) { 3352 sameRowDist = PETSC_TRUE; 3353 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3354 } 3355 } 3356 } else { 3357 /* Check if isrow has same processor distribution as mat */ 3358 sameDist[0] = PETSC_FALSE; 3359 PetscCall(ISGetLocalSize(isrow, &n)); 3360 if (!n) { 3361 sameDist[0] = PETSC_TRUE; 3362 } else { 3363 PetscCall(ISGetMinMax(isrow, &i, &j)); 3364 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3365 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3366 } 3367 3368 /* Check if iscol has same processor distribution as mat */ 3369 sameDist[1] = PETSC_FALSE; 3370 PetscCall(ISGetLocalSize(iscol, &n)); 3371 if (!n) { 3372 sameDist[1] = PETSC_TRUE; 3373 } else { 3374 PetscCall(ISGetMinMax(iscol, &i, &j)); 3375 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3376 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3377 } 3378 3379 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3380 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3381 sameRowDist = tsameDist[0]; 3382 } 3383 3384 if (sameRowDist) { 3385 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3386 /* isrow and iscol have same processor distribution as mat */ 3387 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3388 PetscFunctionReturn(PETSC_SUCCESS); 3389 } else { /* sameRowDist */ 3390 /* isrow has same processor distribution as mat */ 3391 if (call == MAT_INITIAL_MATRIX) { 3392 PetscBool sorted; 3393 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3394 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3395 PetscCall(ISGetSize(iscol, &i)); 3396 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3397 3398 PetscCall(ISSorted(iscol_local, &sorted)); 3399 if (sorted) { 3400 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3401 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3402 PetscFunctionReturn(PETSC_SUCCESS); 3403 } 3404 } else { /* call == MAT_REUSE_MATRIX */ 3405 IS iscol_sub; 3406 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3407 if (iscol_sub) { 3408 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3409 PetscFunctionReturn(PETSC_SUCCESS); 3410 } 3411 } 3412 } 3413 } 3414 3415 /* General case: iscol -> iscol_local which has global size of iscol */ 3416 if (call == MAT_REUSE_MATRIX) { 3417 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3418 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3419 } else { 3420 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3421 } 3422 3423 PetscCall(ISGetLocalSize(iscol, &csize)); 3424 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3425 3426 if (call == MAT_INITIAL_MATRIX) { 3427 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3428 PetscCall(ISDestroy(&iscol_local)); 3429 } 3430 PetscFunctionReturn(PETSC_SUCCESS); 3431 } 3432 3433 /*@C 3434 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3435 and "off-diagonal" part of the matrix in CSR format. 3436 3437 Collective 3438 3439 Input Parameters: 3440 + comm - MPI communicator 3441 . A - "diagonal" portion of matrix 3442 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3443 - garray - global index of `B` columns 3444 3445 Output Parameter: 3446 . mat - the matrix, with input `A` as its local diagonal matrix 3447 3448 Level: advanced 3449 3450 Notes: 3451 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3452 3453 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3454 3455 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3456 @*/ 3457 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3458 { 3459 Mat_MPIAIJ *maij; 3460 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3461 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3462 const PetscScalar *oa; 3463 Mat Bnew; 3464 PetscInt m, n, N; 3465 MatType mpi_mat_type; 3466 3467 PetscFunctionBegin; 3468 PetscCall(MatCreate(comm, mat)); 3469 PetscCall(MatGetSize(A, &m, &n)); 3470 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3471 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3472 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3473 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3474 3475 /* Get global columns of mat */ 3476 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3477 3478 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3479 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3480 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3481 PetscCall(MatSetType(*mat, mpi_mat_type)); 3482 3483 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3484 maij = (Mat_MPIAIJ *)(*mat)->data; 3485 3486 (*mat)->preallocated = PETSC_TRUE; 3487 3488 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3489 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3490 3491 /* Set A as diagonal portion of *mat */ 3492 maij->A = A; 3493 3494 nz = oi[m]; 3495 for (i = 0; i < nz; i++) { 3496 col = oj[i]; 3497 oj[i] = garray[col]; 3498 } 3499 3500 /* Set Bnew as off-diagonal portion of *mat */ 3501 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3502 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3503 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3504 bnew = (Mat_SeqAIJ *)Bnew->data; 3505 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3506 maij->B = Bnew; 3507 3508 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3509 3510 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3511 b->free_a = PETSC_FALSE; 3512 b->free_ij = PETSC_FALSE; 3513 PetscCall(MatDestroy(&B)); 3514 3515 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3516 bnew->free_a = PETSC_TRUE; 3517 bnew->free_ij = PETSC_TRUE; 3518 3519 /* condense columns of maij->B */ 3520 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3521 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3522 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3523 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3524 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3525 PetscFunctionReturn(PETSC_SUCCESS); 3526 } 3527 3528 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3529 3530 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3531 { 3532 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3533 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3534 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3535 Mat M, Msub, B = a->B; 3536 MatScalar *aa; 3537 Mat_SeqAIJ *aij; 3538 PetscInt *garray = a->garray, *colsub, Ncols; 3539 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3540 IS iscol_sub, iscmap; 3541 const PetscInt *is_idx, *cmap; 3542 PetscBool allcolumns = PETSC_FALSE; 3543 MPI_Comm comm; 3544 3545 PetscFunctionBegin; 3546 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3547 if (call == MAT_REUSE_MATRIX) { 3548 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3549 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3550 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3551 3552 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3553 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3554 3555 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3556 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3557 3558 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3559 3560 } else { /* call == MAT_INITIAL_MATRIX) */ 3561 PetscBool flg; 3562 3563 PetscCall(ISGetLocalSize(iscol, &n)); 3564 PetscCall(ISGetSize(iscol, &Ncols)); 3565 3566 /* (1) iscol -> nonscalable iscol_local */ 3567 /* Check for special case: each processor gets entire matrix columns */ 3568 PetscCall(ISIdentity(iscol_local, &flg)); 3569 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3570 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3571 if (allcolumns) { 3572 iscol_sub = iscol_local; 3573 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3574 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3575 3576 } else { 3577 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3578 PetscInt *idx, *cmap1, k; 3579 PetscCall(PetscMalloc1(Ncols, &idx)); 3580 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3581 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3582 count = 0; 3583 k = 0; 3584 for (i = 0; i < Ncols; i++) { 3585 j = is_idx[i]; 3586 if (j >= cstart && j < cend) { 3587 /* diagonal part of mat */ 3588 idx[count] = j; 3589 cmap1[count++] = i; /* column index in submat */ 3590 } else if (Bn) { 3591 /* off-diagonal part of mat */ 3592 if (j == garray[k]) { 3593 idx[count] = j; 3594 cmap1[count++] = i; /* column index in submat */ 3595 } else if (j > garray[k]) { 3596 while (j > garray[k] && k < Bn - 1) k++; 3597 if (j == garray[k]) { 3598 idx[count] = j; 3599 cmap1[count++] = i; /* column index in submat */ 3600 } 3601 } 3602 } 3603 } 3604 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3605 3606 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3607 PetscCall(ISGetBlockSize(iscol, &cbs)); 3608 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3609 3610 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3611 } 3612 3613 /* (3) Create sequential Msub */ 3614 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3615 } 3616 3617 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3618 aij = (Mat_SeqAIJ *)(Msub)->data; 3619 ii = aij->i; 3620 PetscCall(ISGetIndices(iscmap, &cmap)); 3621 3622 /* 3623 m - number of local rows 3624 Ncols - number of columns (same on all processors) 3625 rstart - first row in new global matrix generated 3626 */ 3627 PetscCall(MatGetSize(Msub, &m, NULL)); 3628 3629 if (call == MAT_INITIAL_MATRIX) { 3630 /* (4) Create parallel newmat */ 3631 PetscMPIInt rank, size; 3632 PetscInt csize; 3633 3634 PetscCallMPI(MPI_Comm_size(comm, &size)); 3635 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3636 3637 /* 3638 Determine the number of non-zeros in the diagonal and off-diagonal 3639 portions of the matrix in order to do correct preallocation 3640 */ 3641 3642 /* first get start and end of "diagonal" columns */ 3643 PetscCall(ISGetLocalSize(iscol, &csize)); 3644 if (csize == PETSC_DECIDE) { 3645 PetscCall(ISGetSize(isrow, &mglobal)); 3646 if (mglobal == Ncols) { /* square matrix */ 3647 nlocal = m; 3648 } else { 3649 nlocal = Ncols / size + ((Ncols % size) > rank); 3650 } 3651 } else { 3652 nlocal = csize; 3653 } 3654 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3655 rstart = rend - nlocal; 3656 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3657 3658 /* next, compute all the lengths */ 3659 jj = aij->j; 3660 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3661 olens = dlens + m; 3662 for (i = 0; i < m; i++) { 3663 jend = ii[i + 1] - ii[i]; 3664 olen = 0; 3665 dlen = 0; 3666 for (j = 0; j < jend; j++) { 3667 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3668 else dlen++; 3669 jj++; 3670 } 3671 olens[i] = olen; 3672 dlens[i] = dlen; 3673 } 3674 3675 PetscCall(ISGetBlockSize(isrow, &bs)); 3676 PetscCall(ISGetBlockSize(iscol, &cbs)); 3677 3678 PetscCall(MatCreate(comm, &M)); 3679 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3680 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3681 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3682 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3683 PetscCall(PetscFree(dlens)); 3684 3685 } else { /* call == MAT_REUSE_MATRIX */ 3686 M = *newmat; 3687 PetscCall(MatGetLocalSize(M, &i, NULL)); 3688 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3689 PetscCall(MatZeroEntries(M)); 3690 /* 3691 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3692 rather than the slower MatSetValues(). 3693 */ 3694 M->was_assembled = PETSC_TRUE; 3695 M->assembled = PETSC_FALSE; 3696 } 3697 3698 /* (5) Set values of Msub to *newmat */ 3699 PetscCall(PetscMalloc1(count, &colsub)); 3700 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3701 3702 jj = aij->j; 3703 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3704 for (i = 0; i < m; i++) { 3705 row = rstart + i; 3706 nz = ii[i + 1] - ii[i]; 3707 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3708 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3709 jj += nz; 3710 aa += nz; 3711 } 3712 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3713 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3714 3715 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3716 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3717 3718 PetscCall(PetscFree(colsub)); 3719 3720 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3721 if (call == MAT_INITIAL_MATRIX) { 3722 *newmat = M; 3723 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3724 PetscCall(MatDestroy(&Msub)); 3725 3726 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3727 PetscCall(ISDestroy(&iscol_sub)); 3728 3729 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3730 PetscCall(ISDestroy(&iscmap)); 3731 3732 if (iscol_local) { 3733 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3734 PetscCall(ISDestroy(&iscol_local)); 3735 } 3736 } 3737 PetscFunctionReturn(PETSC_SUCCESS); 3738 } 3739 3740 /* 3741 Not great since it makes two copies of the submatrix, first an SeqAIJ 3742 in local and then by concatenating the local matrices the end result. 3743 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3744 3745 This requires a sequential iscol with all indices. 3746 */ 3747 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3748 { 3749 PetscMPIInt rank, size; 3750 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3751 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3752 Mat M, Mreuse; 3753 MatScalar *aa, *vwork; 3754 MPI_Comm comm; 3755 Mat_SeqAIJ *aij; 3756 PetscBool colflag, allcolumns = PETSC_FALSE; 3757 3758 PetscFunctionBegin; 3759 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3760 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3761 PetscCallMPI(MPI_Comm_size(comm, &size)); 3762 3763 /* Check for special case: each processor gets entire matrix columns */ 3764 PetscCall(ISIdentity(iscol, &colflag)); 3765 PetscCall(ISGetLocalSize(iscol, &n)); 3766 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3767 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3768 3769 if (call == MAT_REUSE_MATRIX) { 3770 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3771 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3772 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3773 } else { 3774 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3775 } 3776 3777 /* 3778 m - number of local rows 3779 n - number of columns (same on all processors) 3780 rstart - first row in new global matrix generated 3781 */ 3782 PetscCall(MatGetSize(Mreuse, &m, &n)); 3783 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3784 if (call == MAT_INITIAL_MATRIX) { 3785 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3786 ii = aij->i; 3787 jj = aij->j; 3788 3789 /* 3790 Determine the number of non-zeros in the diagonal and off-diagonal 3791 portions of the matrix in order to do correct preallocation 3792 */ 3793 3794 /* first get start and end of "diagonal" columns */ 3795 if (csize == PETSC_DECIDE) { 3796 PetscCall(ISGetSize(isrow, &mglobal)); 3797 if (mglobal == n) { /* square matrix */ 3798 nlocal = m; 3799 } else { 3800 nlocal = n / size + ((n % size) > rank); 3801 } 3802 } else { 3803 nlocal = csize; 3804 } 3805 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3806 rstart = rend - nlocal; 3807 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3808 3809 /* next, compute all the lengths */ 3810 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3811 olens = dlens + m; 3812 for (i = 0; i < m; i++) { 3813 jend = ii[i + 1] - ii[i]; 3814 olen = 0; 3815 dlen = 0; 3816 for (j = 0; j < jend; j++) { 3817 if (*jj < rstart || *jj >= rend) olen++; 3818 else dlen++; 3819 jj++; 3820 } 3821 olens[i] = olen; 3822 dlens[i] = dlen; 3823 } 3824 PetscCall(MatCreate(comm, &M)); 3825 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3826 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3827 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3828 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3829 PetscCall(PetscFree(dlens)); 3830 } else { 3831 PetscInt ml, nl; 3832 3833 M = *newmat; 3834 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3835 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3836 PetscCall(MatZeroEntries(M)); 3837 /* 3838 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3839 rather than the slower MatSetValues(). 3840 */ 3841 M->was_assembled = PETSC_TRUE; 3842 M->assembled = PETSC_FALSE; 3843 } 3844 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3845 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3846 ii = aij->i; 3847 jj = aij->j; 3848 3849 /* trigger copy to CPU if needed */ 3850 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3851 for (i = 0; i < m; i++) { 3852 row = rstart + i; 3853 nz = ii[i + 1] - ii[i]; 3854 cwork = jj; 3855 jj += nz; 3856 vwork = aa; 3857 aa += nz; 3858 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3859 } 3860 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3861 3862 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3863 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3864 *newmat = M; 3865 3866 /* save submatrix used in processor for next request */ 3867 if (call == MAT_INITIAL_MATRIX) { 3868 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3869 PetscCall(MatDestroy(&Mreuse)); 3870 } 3871 PetscFunctionReturn(PETSC_SUCCESS); 3872 } 3873 3874 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3875 { 3876 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3877 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3878 const PetscInt *JJ; 3879 PetscBool nooffprocentries; 3880 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3881 3882 PetscFunctionBegin; 3883 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3884 3885 PetscCall(PetscLayoutSetUp(B->rmap)); 3886 PetscCall(PetscLayoutSetUp(B->cmap)); 3887 m = B->rmap->n; 3888 cstart = B->cmap->rstart; 3889 cend = B->cmap->rend; 3890 rstart = B->rmap->rstart; 3891 3892 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3893 3894 if (PetscDefined(USE_DEBUG)) { 3895 for (i = 0; i < m; i++) { 3896 nnz = Ii[i + 1] - Ii[i]; 3897 JJ = J + Ii[i]; 3898 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3899 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3900 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3901 } 3902 } 3903 3904 for (i = 0; i < m; i++) { 3905 nnz = Ii[i + 1] - Ii[i]; 3906 JJ = J + Ii[i]; 3907 nnz_max = PetscMax(nnz_max, nnz); 3908 d = 0; 3909 for (j = 0; j < nnz; j++) { 3910 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3911 } 3912 d_nnz[i] = d; 3913 o_nnz[i] = nnz - d; 3914 } 3915 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3916 PetscCall(PetscFree2(d_nnz, o_nnz)); 3917 3918 for (i = 0; i < m; i++) { 3919 ii = i + rstart; 3920 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3921 } 3922 nooffprocentries = B->nooffprocentries; 3923 B->nooffprocentries = PETSC_TRUE; 3924 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3925 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3926 B->nooffprocentries = nooffprocentries; 3927 3928 /* count number of entries below block diagonal */ 3929 PetscCall(PetscFree(Aij->ld)); 3930 PetscCall(PetscCalloc1(m, &ld)); 3931 Aij->ld = ld; 3932 for (i = 0; i < m; i++) { 3933 nnz = Ii[i + 1] - Ii[i]; 3934 j = 0; 3935 while (j < nnz && J[j] < cstart) j++; 3936 ld[i] = j; 3937 J += nnz; 3938 } 3939 3940 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3941 PetscFunctionReturn(PETSC_SUCCESS); 3942 } 3943 3944 /*@ 3945 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3946 (the default parallel PETSc format). 3947 3948 Collective 3949 3950 Input Parameters: 3951 + B - the matrix 3952 . i - the indices into j for the start of each local row (starts with zero) 3953 . j - the column indices for each local row (starts with zero) 3954 - v - optional values in the matrix 3955 3956 Level: developer 3957 3958 Notes: 3959 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3960 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3961 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3962 3963 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3964 3965 The format which is used for the sparse matrix input, is equivalent to a 3966 row-major ordering.. i.e for the following matrix, the input data expected is 3967 as shown 3968 3969 .vb 3970 1 0 0 3971 2 0 3 P0 3972 ------- 3973 4 5 6 P1 3974 3975 Process0 [P0] rows_owned=[0,1] 3976 i = {0,1,3} [size = nrow+1 = 2+1] 3977 j = {0,0,2} [size = 3] 3978 v = {1,2,3} [size = 3] 3979 3980 Process1 [P1] rows_owned=[2] 3981 i = {0,3} [size = nrow+1 = 1+1] 3982 j = {0,1,2} [size = 3] 3983 v = {4,5,6} [size = 3] 3984 .ve 3985 3986 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3987 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3988 @*/ 3989 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3990 { 3991 PetscFunctionBegin; 3992 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3993 PetscFunctionReturn(PETSC_SUCCESS); 3994 } 3995 3996 /*@C 3997 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3998 (the default parallel PETSc format). For good matrix assembly performance 3999 the user should preallocate the matrix storage by setting the parameters 4000 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4001 4002 Collective 4003 4004 Input Parameters: 4005 + B - the matrix 4006 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4007 (same value is used for all local rows) 4008 . d_nnz - array containing the number of nonzeros in the various rows of the 4009 DIAGONAL portion of the local submatrix (possibly different for each row) 4010 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4011 The size of this array is equal to the number of local rows, i.e 'm'. 4012 For matrices that will be factored, you must leave room for (and set) 4013 the diagonal entry even if it is zero. 4014 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4015 submatrix (same value is used for all local rows). 4016 - o_nnz - array containing the number of nonzeros in the various rows of the 4017 OFF-DIAGONAL portion of the local submatrix (possibly different for 4018 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4019 structure. The size of this array is equal to the number 4020 of local rows, i.e 'm'. 4021 4022 Usage: 4023 Consider the following 8x8 matrix with 34 non-zero values, that is 4024 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4025 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4026 as follows 4027 4028 .vb 4029 1 2 0 | 0 3 0 | 0 4 4030 Proc0 0 5 6 | 7 0 0 | 8 0 4031 9 0 10 | 11 0 0 | 12 0 4032 ------------------------------------- 4033 13 0 14 | 15 16 17 | 0 0 4034 Proc1 0 18 0 | 19 20 21 | 0 0 4035 0 0 0 | 22 23 0 | 24 0 4036 ------------------------------------- 4037 Proc2 25 26 27 | 0 0 28 | 29 0 4038 30 0 0 | 31 32 33 | 0 34 4039 .ve 4040 4041 This can be represented as a collection of submatrices as 4042 .vb 4043 A B C 4044 D E F 4045 G H I 4046 .ve 4047 4048 Where the submatrices A,B,C are owned by proc0, D,E,F are 4049 owned by proc1, G,H,I are owned by proc2. 4050 4051 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4052 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4053 The 'M','N' parameters are 8,8, and have the same values on all procs. 4054 4055 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4056 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4057 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4058 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4059 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4060 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4061 4062 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4063 allocated for every row of the local diagonal submatrix, and `o_nz` 4064 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4065 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4066 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4067 In this case, the values of `d_nz`, `o_nz` are 4068 .vb 4069 proc0 dnz = 2, o_nz = 2 4070 proc1 dnz = 3, o_nz = 2 4071 proc2 dnz = 1, o_nz = 4 4072 .ve 4073 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4074 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4075 for proc3. i.e we are using 12+15+10=37 storage locations to store 4076 34 values. 4077 4078 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4079 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4080 In the above case the values for `d_nnz`, `o_nnz` are 4081 .vb 4082 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4083 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4084 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4085 .ve 4086 Here the space allocated is sum of all the above values i.e 34, and 4087 hence pre-allocation is perfect. 4088 4089 Level: intermediate 4090 4091 Notes: 4092 If the *_nnz parameter is given then the *_nz parameter is ignored 4093 4094 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4095 storage. The stored row and column indices begin with zero. 4096 See [Sparse Matrices](sec_matsparse) for details. 4097 4098 The parallel matrix is partitioned such that the first m0 rows belong to 4099 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4100 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4101 4102 The DIAGONAL portion of the local submatrix of a processor can be defined 4103 as the submatrix which is obtained by extraction the part corresponding to 4104 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4105 first row that belongs to the processor, r2 is the last row belonging to 4106 the this processor, and c1-c2 is range of indices of the local part of a 4107 vector suitable for applying the matrix to. This is an mxn matrix. In the 4108 common case of a square matrix, the row and column ranges are the same and 4109 the DIAGONAL part is also square. The remaining portion of the local 4110 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4111 4112 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4113 4114 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4115 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4116 You can also run with the option `-info` and look for messages with the string 4117 malloc in them to see if additional memory allocation was needed. 4118 4119 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4120 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4121 @*/ 4122 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4123 { 4124 PetscFunctionBegin; 4125 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4126 PetscValidType(B, 1); 4127 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4128 PetscFunctionReturn(PETSC_SUCCESS); 4129 } 4130 4131 /*@ 4132 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4133 CSR format for the local rows. 4134 4135 Collective 4136 4137 Input Parameters: 4138 + comm - MPI communicator 4139 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4140 . n - This value should be the same as the local size used in creating the 4141 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4142 calculated if N is given) For square matrices n is almost always m. 4143 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4144 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4145 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4146 . j - column indices 4147 - a - optional matrix values 4148 4149 Output Parameter: 4150 . mat - the matrix 4151 4152 Level: intermediate 4153 4154 Notes: 4155 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4156 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4157 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4158 4159 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4160 4161 The format which is used for the sparse matrix input, is equivalent to a 4162 row-major ordering.. i.e for the following matrix, the input data expected is 4163 as shown 4164 4165 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4166 .vb 4167 1 0 0 4168 2 0 3 P0 4169 ------- 4170 4 5 6 P1 4171 4172 Process0 [P0] rows_owned=[0,1] 4173 i = {0,1,3} [size = nrow+1 = 2+1] 4174 j = {0,0,2} [size = 3] 4175 v = {1,2,3} [size = 3] 4176 4177 Process1 [P1] rows_owned=[2] 4178 i = {0,3} [size = nrow+1 = 1+1] 4179 j = {0,1,2} [size = 3] 4180 v = {4,5,6} [size = 3] 4181 .ve 4182 4183 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4184 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4185 @*/ 4186 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4187 { 4188 PetscFunctionBegin; 4189 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4190 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4191 PetscCall(MatCreate(comm, mat)); 4192 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4193 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4194 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4195 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4196 PetscFunctionReturn(PETSC_SUCCESS); 4197 } 4198 4199 /*@ 4200 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4201 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4202 from `MatCreateMPIAIJWithArrays()` 4203 4204 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4205 4206 Collective 4207 4208 Input Parameters: 4209 + mat - the matrix 4210 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4211 . n - This value should be the same as the local size used in creating the 4212 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4213 calculated if N is given) For square matrices n is almost always m. 4214 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4215 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4216 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4217 . J - column indices 4218 - v - matrix values 4219 4220 Level: deprecated 4221 4222 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4223 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4224 @*/ 4225 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4226 { 4227 PetscInt nnz, i; 4228 PetscBool nooffprocentries; 4229 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4230 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4231 PetscScalar *ad, *ao; 4232 PetscInt ldi, Iii, md; 4233 const PetscInt *Adi = Ad->i; 4234 PetscInt *ld = Aij->ld; 4235 4236 PetscFunctionBegin; 4237 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4238 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4239 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4240 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4241 4242 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4243 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4244 4245 for (i = 0; i < m; i++) { 4246 nnz = Ii[i + 1] - Ii[i]; 4247 Iii = Ii[i]; 4248 ldi = ld[i]; 4249 md = Adi[i + 1] - Adi[i]; 4250 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4251 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4252 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4253 ad += md; 4254 ao += nnz - md; 4255 } 4256 nooffprocentries = mat->nooffprocentries; 4257 mat->nooffprocentries = PETSC_TRUE; 4258 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4259 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4260 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4261 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4262 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4263 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4264 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4265 mat->nooffprocentries = nooffprocentries; 4266 PetscFunctionReturn(PETSC_SUCCESS); 4267 } 4268 4269 /*@ 4270 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4271 4272 Collective 4273 4274 Input Parameters: 4275 + mat - the matrix 4276 - v - matrix values, stored by row 4277 4278 Level: intermediate 4279 4280 Note: 4281 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4282 4283 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4284 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4285 @*/ 4286 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4287 { 4288 PetscInt nnz, i, m; 4289 PetscBool nooffprocentries; 4290 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4291 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4292 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4293 PetscScalar *ad, *ao; 4294 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4295 PetscInt ldi, Iii, md; 4296 PetscInt *ld = Aij->ld; 4297 4298 PetscFunctionBegin; 4299 m = mat->rmap->n; 4300 4301 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4302 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4303 Iii = 0; 4304 for (i = 0; i < m; i++) { 4305 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4306 ldi = ld[i]; 4307 md = Adi[i + 1] - Adi[i]; 4308 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4309 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4310 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4311 ad += md; 4312 ao += nnz - md; 4313 Iii += nnz; 4314 } 4315 nooffprocentries = mat->nooffprocentries; 4316 mat->nooffprocentries = PETSC_TRUE; 4317 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4318 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4319 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4320 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4321 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4322 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4323 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4324 mat->nooffprocentries = nooffprocentries; 4325 PetscFunctionReturn(PETSC_SUCCESS); 4326 } 4327 4328 /*@C 4329 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4330 (the default parallel PETSc format). For good matrix assembly performance 4331 the user should preallocate the matrix storage by setting the parameters 4332 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4333 4334 Collective 4335 4336 Input Parameters: 4337 + comm - MPI communicator 4338 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4339 This value should be the same as the local size used in creating the 4340 y vector for the matrix-vector product y = Ax. 4341 . n - This value should be the same as the local size used in creating the 4342 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4343 calculated if N is given) For square matrices n is almost always m. 4344 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4345 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4346 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4347 (same value is used for all local rows) 4348 . d_nnz - array containing the number of nonzeros in the various rows of the 4349 DIAGONAL portion of the local submatrix (possibly different for each row) 4350 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4351 The size of this array is equal to the number of local rows, i.e 'm'. 4352 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4353 submatrix (same value is used for all local rows). 4354 - o_nnz - array containing the number of nonzeros in the various rows of the 4355 OFF-DIAGONAL portion of the local submatrix (possibly different for 4356 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4357 structure. The size of this array is equal to the number 4358 of local rows, i.e 'm'. 4359 4360 Output Parameter: 4361 . A - the matrix 4362 4363 Options Database Keys: 4364 + -mat_no_inode - Do not use inodes 4365 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4366 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4367 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4368 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4369 4370 Level: intermediate 4371 4372 Notes: 4373 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4374 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4375 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4376 4377 If the *_nnz parameter is given then the *_nz parameter is ignored 4378 4379 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4380 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4381 storage requirements for this matrix. 4382 4383 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4384 processor than it must be used on all processors that share the object for 4385 that argument. 4386 4387 The user MUST specify either the local or global matrix dimensions 4388 (possibly both). 4389 4390 The parallel matrix is partitioned across processors such that the 4391 first m0 rows belong to process 0, the next m1 rows belong to 4392 process 1, the next m2 rows belong to process 2 etc.. where 4393 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4394 values corresponding to [m x N] submatrix. 4395 4396 The columns are logically partitioned with the n0 columns belonging 4397 to 0th partition, the next n1 columns belonging to the next 4398 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4399 4400 The DIAGONAL portion of the local submatrix on any given processor 4401 is the submatrix corresponding to the rows and columns m,n 4402 corresponding to the given processor. i.e diagonal matrix on 4403 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4404 etc. The remaining portion of the local submatrix [m x (N-n)] 4405 constitute the OFF-DIAGONAL portion. The example below better 4406 illustrates this concept. 4407 4408 For a square global matrix we define each processor's diagonal portion 4409 to be its local rows and the corresponding columns (a square submatrix); 4410 each processor's off-diagonal portion encompasses the remainder of the 4411 local matrix (a rectangular submatrix). 4412 4413 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4414 4415 When calling this routine with a single process communicator, a matrix of 4416 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4417 type of communicator, use the construction mechanism 4418 .vb 4419 MatCreate(...,&A); 4420 MatSetType(A,MATMPIAIJ); 4421 MatSetSizes(A, m,n,M,N); 4422 MatMPIAIJSetPreallocation(A,...); 4423 .ve 4424 4425 By default, this format uses inodes (identical nodes) when possible. 4426 We search for consecutive rows with the same nonzero structure, thereby 4427 reusing matrix information to achieve increased efficiency. 4428 4429 Usage: 4430 Consider the following 8x8 matrix with 34 non-zero values, that is 4431 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4432 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4433 as follows 4434 4435 .vb 4436 1 2 0 | 0 3 0 | 0 4 4437 Proc0 0 5 6 | 7 0 0 | 8 0 4438 9 0 10 | 11 0 0 | 12 0 4439 ------------------------------------- 4440 13 0 14 | 15 16 17 | 0 0 4441 Proc1 0 18 0 | 19 20 21 | 0 0 4442 0 0 0 | 22 23 0 | 24 0 4443 ------------------------------------- 4444 Proc2 25 26 27 | 0 0 28 | 29 0 4445 30 0 0 | 31 32 33 | 0 34 4446 .ve 4447 4448 This can be represented as a collection of submatrices as 4449 4450 .vb 4451 A B C 4452 D E F 4453 G H I 4454 .ve 4455 4456 Where the submatrices A,B,C are owned by proc0, D,E,F are 4457 owned by proc1, G,H,I are owned by proc2. 4458 4459 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4460 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4461 The 'M','N' parameters are 8,8, and have the same values on all procs. 4462 4463 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4464 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4465 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4466 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4467 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4468 matrix, ans [DF] as another SeqAIJ matrix. 4469 4470 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4471 allocated for every row of the local diagonal submatrix, and `o_nz` 4472 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4473 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4474 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4475 In this case, the values of `d_nz`,`o_nz` are 4476 .vb 4477 proc0 dnz = 2, o_nz = 2 4478 proc1 dnz = 3, o_nz = 2 4479 proc2 dnz = 1, o_nz = 4 4480 .ve 4481 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4482 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4483 for proc3. i.e we are using 12+15+10=37 storage locations to store 4484 34 values. 4485 4486 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4487 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4488 In the above case the values for d_nnz,o_nnz are 4489 .vb 4490 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4491 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4492 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4493 .ve 4494 Here the space allocated is sum of all the above values i.e 34, and 4495 hence pre-allocation is perfect. 4496 4497 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4498 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4499 @*/ 4500 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4501 { 4502 PetscMPIInt size; 4503 4504 PetscFunctionBegin; 4505 PetscCall(MatCreate(comm, A)); 4506 PetscCall(MatSetSizes(*A, m, n, M, N)); 4507 PetscCallMPI(MPI_Comm_size(comm, &size)); 4508 if (size > 1) { 4509 PetscCall(MatSetType(*A, MATMPIAIJ)); 4510 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4511 } else { 4512 PetscCall(MatSetType(*A, MATSEQAIJ)); 4513 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4514 } 4515 PetscFunctionReturn(PETSC_SUCCESS); 4516 } 4517 4518 /*MC 4519 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4520 4521 Synopsis: 4522 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4523 4524 Not Collective 4525 4526 Input Parameter: 4527 . A - the `MATMPIAIJ` matrix 4528 4529 Output Parameters: 4530 + Ad - the diagonal portion of the matrix 4531 . Ao - the off diagonal portion of the matrix 4532 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4533 - ierr - error code 4534 4535 Level: advanced 4536 4537 Note: 4538 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4539 4540 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4541 M*/ 4542 4543 /*MC 4544 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4545 4546 Synopsis: 4547 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4548 4549 Not Collective 4550 4551 Input Parameters: 4552 + A - the `MATMPIAIJ` matrix 4553 . Ad - the diagonal portion of the matrix 4554 . Ao - the off diagonal portion of the matrix 4555 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4556 - ierr - error code 4557 4558 Level: advanced 4559 4560 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4561 M*/ 4562 4563 /*@C 4564 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4565 4566 Not Collective 4567 4568 Input Parameter: 4569 . A - The `MATMPIAIJ` matrix 4570 4571 Output Parameters: 4572 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4573 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4574 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4575 4576 Level: intermediate 4577 4578 Note: 4579 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4580 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4581 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4582 local column numbers to global column numbers in the original matrix. 4583 4584 Fortran Note: 4585 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4586 4587 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4588 @*/ 4589 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4590 { 4591 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4592 PetscBool flg; 4593 4594 PetscFunctionBegin; 4595 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4596 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4597 if (Ad) *Ad = a->A; 4598 if (Ao) *Ao = a->B; 4599 if (colmap) *colmap = a->garray; 4600 PetscFunctionReturn(PETSC_SUCCESS); 4601 } 4602 4603 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4604 { 4605 PetscInt m, N, i, rstart, nnz, Ii; 4606 PetscInt *indx; 4607 PetscScalar *values; 4608 MatType rootType; 4609 4610 PetscFunctionBegin; 4611 PetscCall(MatGetSize(inmat, &m, &N)); 4612 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4613 PetscInt *dnz, *onz, sum, bs, cbs; 4614 4615 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4616 /* Check sum(n) = N */ 4617 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4618 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4619 4620 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4621 rstart -= m; 4622 4623 MatPreallocateBegin(comm, m, n, dnz, onz); 4624 for (i = 0; i < m; i++) { 4625 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4626 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4627 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4628 } 4629 4630 PetscCall(MatCreate(comm, outmat)); 4631 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4632 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4633 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4634 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4635 PetscCall(MatSetType(*outmat, rootType)); 4636 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4637 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4638 MatPreallocateEnd(dnz, onz); 4639 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4640 } 4641 4642 /* numeric phase */ 4643 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4644 for (i = 0; i < m; i++) { 4645 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4646 Ii = i + rstart; 4647 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4648 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4649 } 4650 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4651 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4652 PetscFunctionReturn(PETSC_SUCCESS); 4653 } 4654 4655 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4656 { 4657 PetscMPIInt rank; 4658 PetscInt m, N, i, rstart, nnz; 4659 size_t len; 4660 const PetscInt *indx; 4661 PetscViewer out; 4662 char *name; 4663 Mat B; 4664 const PetscScalar *values; 4665 4666 PetscFunctionBegin; 4667 PetscCall(MatGetLocalSize(A, &m, NULL)); 4668 PetscCall(MatGetSize(A, NULL, &N)); 4669 /* Should this be the type of the diagonal block of A? */ 4670 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4671 PetscCall(MatSetSizes(B, m, N, m, N)); 4672 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4673 PetscCall(MatSetType(B, MATSEQAIJ)); 4674 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4675 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4676 for (i = 0; i < m; i++) { 4677 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4678 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4679 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4680 } 4681 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4682 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4683 4684 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4685 PetscCall(PetscStrlen(outfile, &len)); 4686 PetscCall(PetscMalloc1(len + 6, &name)); 4687 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4688 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4689 PetscCall(PetscFree(name)); 4690 PetscCall(MatView(B, out)); 4691 PetscCall(PetscViewerDestroy(&out)); 4692 PetscCall(MatDestroy(&B)); 4693 PetscFunctionReturn(PETSC_SUCCESS); 4694 } 4695 4696 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4697 { 4698 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4699 4700 PetscFunctionBegin; 4701 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4702 PetscCall(PetscFree(merge->id_r)); 4703 PetscCall(PetscFree(merge->len_s)); 4704 PetscCall(PetscFree(merge->len_r)); 4705 PetscCall(PetscFree(merge->bi)); 4706 PetscCall(PetscFree(merge->bj)); 4707 PetscCall(PetscFree(merge->buf_ri[0])); 4708 PetscCall(PetscFree(merge->buf_ri)); 4709 PetscCall(PetscFree(merge->buf_rj[0])); 4710 PetscCall(PetscFree(merge->buf_rj)); 4711 PetscCall(PetscFree(merge->coi)); 4712 PetscCall(PetscFree(merge->coj)); 4713 PetscCall(PetscFree(merge->owners_co)); 4714 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4715 PetscCall(PetscFree(merge)); 4716 PetscFunctionReturn(PETSC_SUCCESS); 4717 } 4718 4719 #include <../src/mat/utils/freespace.h> 4720 #include <petscbt.h> 4721 4722 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4723 { 4724 MPI_Comm comm; 4725 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4726 PetscMPIInt size, rank, taga, *len_s; 4727 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4728 PetscInt proc, m; 4729 PetscInt **buf_ri, **buf_rj; 4730 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4731 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4732 MPI_Request *s_waits, *r_waits; 4733 MPI_Status *status; 4734 const MatScalar *aa, *a_a; 4735 MatScalar **abuf_r, *ba_i; 4736 Mat_Merge_SeqsToMPI *merge; 4737 PetscContainer container; 4738 4739 PetscFunctionBegin; 4740 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4741 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4742 4743 PetscCallMPI(MPI_Comm_size(comm, &size)); 4744 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4745 4746 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4747 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4748 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4749 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4750 aa = a_a; 4751 4752 bi = merge->bi; 4753 bj = merge->bj; 4754 buf_ri = merge->buf_ri; 4755 buf_rj = merge->buf_rj; 4756 4757 PetscCall(PetscMalloc1(size, &status)); 4758 owners = merge->rowmap->range; 4759 len_s = merge->len_s; 4760 4761 /* send and recv matrix values */ 4762 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4763 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4764 4765 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4766 for (proc = 0, k = 0; proc < size; proc++) { 4767 if (!len_s[proc]) continue; 4768 i = owners[proc]; 4769 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4770 k++; 4771 } 4772 4773 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4774 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4775 PetscCall(PetscFree(status)); 4776 4777 PetscCall(PetscFree(s_waits)); 4778 PetscCall(PetscFree(r_waits)); 4779 4780 /* insert mat values of mpimat */ 4781 PetscCall(PetscMalloc1(N, &ba_i)); 4782 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4783 4784 for (k = 0; k < merge->nrecv; k++) { 4785 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4786 nrows = *(buf_ri_k[k]); 4787 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4788 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4789 } 4790 4791 /* set values of ba */ 4792 m = merge->rowmap->n; 4793 for (i = 0; i < m; i++) { 4794 arow = owners[rank] + i; 4795 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4796 bnzi = bi[i + 1] - bi[i]; 4797 PetscCall(PetscArrayzero(ba_i, bnzi)); 4798 4799 /* add local non-zero vals of this proc's seqmat into ba */ 4800 anzi = ai[arow + 1] - ai[arow]; 4801 aj = a->j + ai[arow]; 4802 aa = a_a + ai[arow]; 4803 nextaj = 0; 4804 for (j = 0; nextaj < anzi; j++) { 4805 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4806 ba_i[j] += aa[nextaj++]; 4807 } 4808 } 4809 4810 /* add received vals into ba */ 4811 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4812 /* i-th row */ 4813 if (i == *nextrow[k]) { 4814 anzi = *(nextai[k] + 1) - *nextai[k]; 4815 aj = buf_rj[k] + *(nextai[k]); 4816 aa = abuf_r[k] + *(nextai[k]); 4817 nextaj = 0; 4818 for (j = 0; nextaj < anzi; j++) { 4819 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4820 ba_i[j] += aa[nextaj++]; 4821 } 4822 } 4823 nextrow[k]++; 4824 nextai[k]++; 4825 } 4826 } 4827 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4828 } 4829 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4830 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4831 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4832 4833 PetscCall(PetscFree(abuf_r[0])); 4834 PetscCall(PetscFree(abuf_r)); 4835 PetscCall(PetscFree(ba_i)); 4836 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4837 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4838 PetscFunctionReturn(PETSC_SUCCESS); 4839 } 4840 4841 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4842 { 4843 Mat B_mpi; 4844 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4845 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4846 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4847 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4848 PetscInt len, proc, *dnz, *onz, bs, cbs; 4849 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4850 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4851 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4852 MPI_Status *status; 4853 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4854 PetscBT lnkbt; 4855 Mat_Merge_SeqsToMPI *merge; 4856 PetscContainer container; 4857 4858 PetscFunctionBegin; 4859 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4860 4861 /* make sure it is a PETSc comm */ 4862 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4863 PetscCallMPI(MPI_Comm_size(comm, &size)); 4864 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4865 4866 PetscCall(PetscNew(&merge)); 4867 PetscCall(PetscMalloc1(size, &status)); 4868 4869 /* determine row ownership */ 4870 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4871 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4872 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4873 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4874 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4875 PetscCall(PetscMalloc1(size, &len_si)); 4876 PetscCall(PetscMalloc1(size, &merge->len_s)); 4877 4878 m = merge->rowmap->n; 4879 owners = merge->rowmap->range; 4880 4881 /* determine the number of messages to send, their lengths */ 4882 len_s = merge->len_s; 4883 4884 len = 0; /* length of buf_si[] */ 4885 merge->nsend = 0; 4886 for (proc = 0; proc < size; proc++) { 4887 len_si[proc] = 0; 4888 if (proc == rank) { 4889 len_s[proc] = 0; 4890 } else { 4891 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4892 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4893 } 4894 if (len_s[proc]) { 4895 merge->nsend++; 4896 nrows = 0; 4897 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4898 if (ai[i + 1] > ai[i]) nrows++; 4899 } 4900 len_si[proc] = 2 * (nrows + 1); 4901 len += len_si[proc]; 4902 } 4903 } 4904 4905 /* determine the number and length of messages to receive for ij-structure */ 4906 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4907 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4908 4909 /* post the Irecv of j-structure */ 4910 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4911 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4912 4913 /* post the Isend of j-structure */ 4914 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4915 4916 for (proc = 0, k = 0; proc < size; proc++) { 4917 if (!len_s[proc]) continue; 4918 i = owners[proc]; 4919 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4920 k++; 4921 } 4922 4923 /* receives and sends of j-structure are complete */ 4924 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4925 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4926 4927 /* send and recv i-structure */ 4928 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4929 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4930 4931 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4932 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4933 for (proc = 0, k = 0; proc < size; proc++) { 4934 if (!len_s[proc]) continue; 4935 /* form outgoing message for i-structure: 4936 buf_si[0]: nrows to be sent 4937 [1:nrows]: row index (global) 4938 [nrows+1:2*nrows+1]: i-structure index 4939 */ 4940 nrows = len_si[proc] / 2 - 1; 4941 buf_si_i = buf_si + nrows + 1; 4942 buf_si[0] = nrows; 4943 buf_si_i[0] = 0; 4944 nrows = 0; 4945 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4946 anzi = ai[i + 1] - ai[i]; 4947 if (anzi) { 4948 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4949 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4950 nrows++; 4951 } 4952 } 4953 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4954 k++; 4955 buf_si += len_si[proc]; 4956 } 4957 4958 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4959 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4960 4961 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4962 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4963 4964 PetscCall(PetscFree(len_si)); 4965 PetscCall(PetscFree(len_ri)); 4966 PetscCall(PetscFree(rj_waits)); 4967 PetscCall(PetscFree2(si_waits, sj_waits)); 4968 PetscCall(PetscFree(ri_waits)); 4969 PetscCall(PetscFree(buf_s)); 4970 PetscCall(PetscFree(status)); 4971 4972 /* compute a local seq matrix in each processor */ 4973 /* allocate bi array and free space for accumulating nonzero column info */ 4974 PetscCall(PetscMalloc1(m + 1, &bi)); 4975 bi[0] = 0; 4976 4977 /* create and initialize a linked list */ 4978 nlnk = N + 1; 4979 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4980 4981 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4982 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4983 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4984 4985 current_space = free_space; 4986 4987 /* determine symbolic info for each local row */ 4988 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4989 4990 for (k = 0; k < merge->nrecv; k++) { 4991 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4992 nrows = *buf_ri_k[k]; 4993 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4994 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4995 } 4996 4997 MatPreallocateBegin(comm, m, n, dnz, onz); 4998 len = 0; 4999 for (i = 0; i < m; i++) { 5000 bnzi = 0; 5001 /* add local non-zero cols of this proc's seqmat into lnk */ 5002 arow = owners[rank] + i; 5003 anzi = ai[arow + 1] - ai[arow]; 5004 aj = a->j + ai[arow]; 5005 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5006 bnzi += nlnk; 5007 /* add received col data into lnk */ 5008 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5009 if (i == *nextrow[k]) { /* i-th row */ 5010 anzi = *(nextai[k] + 1) - *nextai[k]; 5011 aj = buf_rj[k] + *nextai[k]; 5012 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5013 bnzi += nlnk; 5014 nextrow[k]++; 5015 nextai[k]++; 5016 } 5017 } 5018 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5019 5020 /* if free space is not available, make more free space */ 5021 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5022 /* copy data into free space, then initialize lnk */ 5023 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5024 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5025 5026 current_space->array += bnzi; 5027 current_space->local_used += bnzi; 5028 current_space->local_remaining -= bnzi; 5029 5030 bi[i + 1] = bi[i] + bnzi; 5031 } 5032 5033 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5034 5035 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5036 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5037 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5038 5039 /* create symbolic parallel matrix B_mpi */ 5040 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5041 PetscCall(MatCreate(comm, &B_mpi)); 5042 if (n == PETSC_DECIDE) { 5043 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5044 } else { 5045 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5046 } 5047 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5048 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5049 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5050 MatPreallocateEnd(dnz, onz); 5051 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5052 5053 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5054 B_mpi->assembled = PETSC_FALSE; 5055 merge->bi = bi; 5056 merge->bj = bj; 5057 merge->buf_ri = buf_ri; 5058 merge->buf_rj = buf_rj; 5059 merge->coi = NULL; 5060 merge->coj = NULL; 5061 merge->owners_co = NULL; 5062 5063 PetscCall(PetscCommDestroy(&comm)); 5064 5065 /* attach the supporting struct to B_mpi for reuse */ 5066 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5067 PetscCall(PetscContainerSetPointer(container, merge)); 5068 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5069 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5070 PetscCall(PetscContainerDestroy(&container)); 5071 *mpimat = B_mpi; 5072 5073 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5074 PetscFunctionReturn(PETSC_SUCCESS); 5075 } 5076 5077 /*@C 5078 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5079 matrices from each processor 5080 5081 Collective 5082 5083 Input Parameters: 5084 + comm - the communicators the parallel matrix will live on 5085 . seqmat - the input sequential matrices 5086 . m - number of local rows (or `PETSC_DECIDE`) 5087 . n - number of local columns (or `PETSC_DECIDE`) 5088 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5089 5090 Output Parameter: 5091 . mpimat - the parallel matrix generated 5092 5093 Level: advanced 5094 5095 Note: 5096 The dimensions of the sequential matrix in each processor MUST be the same. 5097 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5098 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5099 5100 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5101 @*/ 5102 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5103 { 5104 PetscMPIInt size; 5105 5106 PetscFunctionBegin; 5107 PetscCallMPI(MPI_Comm_size(comm, &size)); 5108 if (size == 1) { 5109 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5110 if (scall == MAT_INITIAL_MATRIX) { 5111 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5112 } else { 5113 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5114 } 5115 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5116 PetscFunctionReturn(PETSC_SUCCESS); 5117 } 5118 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5119 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5120 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5121 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5122 PetscFunctionReturn(PETSC_SUCCESS); 5123 } 5124 5125 /*@ 5126 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking its local rows and putting them into a sequential matrix with 5127 mlocal rows and n columns. Where mlocal is obtained with `MatGetLocalSize()` and n is the global column count obtained 5128 with `MatGetSize()` 5129 5130 Not Collective 5131 5132 Input Parameter: 5133 . A - the matrix 5134 5135 Output Parameter: 5136 . A_loc - the local sequential matrix generated 5137 5138 Level: developer 5139 5140 Notes: 5141 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5142 5143 Destroy the matrix with `MatDestroy()` 5144 5145 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5146 @*/ 5147 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5148 { 5149 PetscBool mpi; 5150 5151 PetscFunctionBegin; 5152 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5153 if (mpi) { 5154 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5155 } else { 5156 *A_loc = A; 5157 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5158 } 5159 PetscFunctionReturn(PETSC_SUCCESS); 5160 } 5161 5162 /*@ 5163 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5164 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5165 with `MatGetSize()` 5166 5167 Not Collective 5168 5169 Input Parameters: 5170 + A - the matrix 5171 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5172 5173 Output Parameter: 5174 . A_loc - the local sequential matrix generated 5175 5176 Level: developer 5177 5178 Notes: 5179 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5180 5181 When the communicator associated with `A` has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A`. 5182 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called. 5183 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5184 modify the values of the returned `A_loc`. 5185 5186 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5187 @*/ 5188 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5189 { 5190 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5191 Mat_SeqAIJ *mat, *a, *b; 5192 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5193 const PetscScalar *aa, *ba, *aav, *bav; 5194 PetscScalar *ca, *cam; 5195 PetscMPIInt size; 5196 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5197 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5198 PetscBool match; 5199 5200 PetscFunctionBegin; 5201 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5202 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5203 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5204 if (size == 1) { 5205 if (scall == MAT_INITIAL_MATRIX) { 5206 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5207 *A_loc = mpimat->A; 5208 } else if (scall == MAT_REUSE_MATRIX) { 5209 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5210 } 5211 PetscFunctionReturn(PETSC_SUCCESS); 5212 } 5213 5214 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5215 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5216 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5217 ai = a->i; 5218 aj = a->j; 5219 bi = b->i; 5220 bj = b->j; 5221 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5222 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5223 aa = aav; 5224 ba = bav; 5225 if (scall == MAT_INITIAL_MATRIX) { 5226 PetscCall(PetscMalloc1(1 + am, &ci)); 5227 ci[0] = 0; 5228 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5229 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5230 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5231 k = 0; 5232 for (i = 0; i < am; i++) { 5233 ncols_o = bi[i + 1] - bi[i]; 5234 ncols_d = ai[i + 1] - ai[i]; 5235 /* off-diagonal portion of A */ 5236 for (jo = 0; jo < ncols_o; jo++) { 5237 col = cmap[*bj]; 5238 if (col >= cstart) break; 5239 cj[k] = col; 5240 bj++; 5241 ca[k++] = *ba++; 5242 } 5243 /* diagonal portion of A */ 5244 for (j = 0; j < ncols_d; j++) { 5245 cj[k] = cstart + *aj++; 5246 ca[k++] = *aa++; 5247 } 5248 /* off-diagonal portion of A */ 5249 for (j = jo; j < ncols_o; j++) { 5250 cj[k] = cmap[*bj++]; 5251 ca[k++] = *ba++; 5252 } 5253 } 5254 /* put together the new matrix */ 5255 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5256 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5257 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5258 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5259 mat->free_a = PETSC_TRUE; 5260 mat->free_ij = PETSC_TRUE; 5261 mat->nonew = 0; 5262 } else if (scall == MAT_REUSE_MATRIX) { 5263 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5264 ci = mat->i; 5265 cj = mat->j; 5266 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5267 for (i = 0; i < am; i++) { 5268 /* off-diagonal portion of A */ 5269 ncols_o = bi[i + 1] - bi[i]; 5270 for (jo = 0; jo < ncols_o; jo++) { 5271 col = cmap[*bj]; 5272 if (col >= cstart) break; 5273 *cam++ = *ba++; 5274 bj++; 5275 } 5276 /* diagonal portion of A */ 5277 ncols_d = ai[i + 1] - ai[i]; 5278 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5279 /* off-diagonal portion of A */ 5280 for (j = jo; j < ncols_o; j++) { 5281 *cam++ = *ba++; 5282 bj++; 5283 } 5284 } 5285 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5286 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5287 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5288 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5289 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5290 PetscFunctionReturn(PETSC_SUCCESS); 5291 } 5292 5293 /*@ 5294 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5295 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5296 5297 Not Collective 5298 5299 Input Parameters: 5300 + A - the matrix 5301 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5302 5303 Output Parameters: 5304 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5305 - A_loc - the local sequential matrix generated 5306 5307 Level: developer 5308 5309 Note: 5310 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5311 part, then those associated with the off diagonal part (in its local ordering) 5312 5313 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5314 @*/ 5315 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5316 { 5317 Mat Ao, Ad; 5318 const PetscInt *cmap; 5319 PetscMPIInt size; 5320 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5321 5322 PetscFunctionBegin; 5323 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5324 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5325 if (size == 1) { 5326 if (scall == MAT_INITIAL_MATRIX) { 5327 PetscCall(PetscObjectReference((PetscObject)Ad)); 5328 *A_loc = Ad; 5329 } else if (scall == MAT_REUSE_MATRIX) { 5330 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5331 } 5332 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5333 PetscFunctionReturn(PETSC_SUCCESS); 5334 } 5335 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5336 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5337 if (f) { 5338 PetscCall((*f)(A, scall, glob, A_loc)); 5339 } else { 5340 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5341 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5342 Mat_SeqAIJ *c; 5343 PetscInt *ai = a->i, *aj = a->j; 5344 PetscInt *bi = b->i, *bj = b->j; 5345 PetscInt *ci, *cj; 5346 const PetscScalar *aa, *ba; 5347 PetscScalar *ca; 5348 PetscInt i, j, am, dn, on; 5349 5350 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5351 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5352 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5353 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5354 if (scall == MAT_INITIAL_MATRIX) { 5355 PetscInt k; 5356 PetscCall(PetscMalloc1(1 + am, &ci)); 5357 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5358 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5359 ci[0] = 0; 5360 for (i = 0, k = 0; i < am; i++) { 5361 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5362 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5363 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5364 /* diagonal portion of A */ 5365 for (j = 0; j < ncols_d; j++, k++) { 5366 cj[k] = *aj++; 5367 ca[k] = *aa++; 5368 } 5369 /* off-diagonal portion of A */ 5370 for (j = 0; j < ncols_o; j++, k++) { 5371 cj[k] = dn + *bj++; 5372 ca[k] = *ba++; 5373 } 5374 } 5375 /* put together the new matrix */ 5376 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5377 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5378 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5379 c = (Mat_SeqAIJ *)(*A_loc)->data; 5380 c->free_a = PETSC_TRUE; 5381 c->free_ij = PETSC_TRUE; 5382 c->nonew = 0; 5383 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5384 } else if (scall == MAT_REUSE_MATRIX) { 5385 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5386 for (i = 0; i < am; i++) { 5387 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5388 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5389 /* diagonal portion of A */ 5390 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5391 /* off-diagonal portion of A */ 5392 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5393 } 5394 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5395 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5396 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5397 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5398 if (glob) { 5399 PetscInt cst, *gidx; 5400 5401 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5402 PetscCall(PetscMalloc1(dn + on, &gidx)); 5403 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5404 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5405 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5406 } 5407 } 5408 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5409 PetscFunctionReturn(PETSC_SUCCESS); 5410 } 5411 5412 /*@C 5413 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5414 5415 Not Collective 5416 5417 Input Parameters: 5418 + A - the matrix 5419 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5420 . row - index set of rows to extract (or `NULL`) 5421 - col - index set of columns to extract (or `NULL`) 5422 5423 Output Parameter: 5424 . A_loc - the local sequential matrix generated 5425 5426 Level: developer 5427 5428 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5429 @*/ 5430 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5431 { 5432 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5433 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5434 IS isrowa, iscola; 5435 Mat *aloc; 5436 PetscBool match; 5437 5438 PetscFunctionBegin; 5439 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5440 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5441 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5442 if (!row) { 5443 start = A->rmap->rstart; 5444 end = A->rmap->rend; 5445 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5446 } else { 5447 isrowa = *row; 5448 } 5449 if (!col) { 5450 start = A->cmap->rstart; 5451 cmap = a->garray; 5452 nzA = a->A->cmap->n; 5453 nzB = a->B->cmap->n; 5454 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5455 ncols = 0; 5456 for (i = 0; i < nzB; i++) { 5457 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5458 else break; 5459 } 5460 imark = i; 5461 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5462 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5463 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5464 } else { 5465 iscola = *col; 5466 } 5467 if (scall != MAT_INITIAL_MATRIX) { 5468 PetscCall(PetscMalloc1(1, &aloc)); 5469 aloc[0] = *A_loc; 5470 } 5471 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5472 if (!col) { /* attach global id of condensed columns */ 5473 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5474 } 5475 *A_loc = aloc[0]; 5476 PetscCall(PetscFree(aloc)); 5477 if (!row) PetscCall(ISDestroy(&isrowa)); 5478 if (!col) PetscCall(ISDestroy(&iscola)); 5479 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5480 PetscFunctionReturn(PETSC_SUCCESS); 5481 } 5482 5483 /* 5484 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5485 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5486 * on a global size. 5487 * */ 5488 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5489 { 5490 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5491 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5492 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5493 PetscMPIInt owner; 5494 PetscSFNode *iremote, *oiremote; 5495 const PetscInt *lrowindices; 5496 PetscSF sf, osf; 5497 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5498 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5499 MPI_Comm comm; 5500 ISLocalToGlobalMapping mapping; 5501 const PetscScalar *pd_a, *po_a; 5502 5503 PetscFunctionBegin; 5504 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5505 /* plocalsize is the number of roots 5506 * nrows is the number of leaves 5507 * */ 5508 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5509 PetscCall(ISGetLocalSize(rows, &nrows)); 5510 PetscCall(PetscCalloc1(nrows, &iremote)); 5511 PetscCall(ISGetIndices(rows, &lrowindices)); 5512 for (i = 0; i < nrows; i++) { 5513 /* Find a remote index and an owner for a row 5514 * The row could be local or remote 5515 * */ 5516 owner = 0; 5517 lidx = 0; 5518 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5519 iremote[i].index = lidx; 5520 iremote[i].rank = owner; 5521 } 5522 /* Create SF to communicate how many nonzero columns for each row */ 5523 PetscCall(PetscSFCreate(comm, &sf)); 5524 /* SF will figure out the number of nonzero colunms for each row, and their 5525 * offsets 5526 * */ 5527 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5528 PetscCall(PetscSFSetFromOptions(sf)); 5529 PetscCall(PetscSFSetUp(sf)); 5530 5531 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5532 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5533 PetscCall(PetscCalloc1(nrows, &pnnz)); 5534 roffsets[0] = 0; 5535 roffsets[1] = 0; 5536 for (i = 0; i < plocalsize; i++) { 5537 /* diag */ 5538 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5539 /* off diag */ 5540 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5541 /* compute offsets so that we relative location for each row */ 5542 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5543 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5544 } 5545 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5546 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5547 /* 'r' means root, and 'l' means leaf */ 5548 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5549 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5550 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5551 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5552 PetscCall(PetscSFDestroy(&sf)); 5553 PetscCall(PetscFree(roffsets)); 5554 PetscCall(PetscFree(nrcols)); 5555 dntotalcols = 0; 5556 ontotalcols = 0; 5557 ncol = 0; 5558 for (i = 0; i < nrows; i++) { 5559 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5560 ncol = PetscMax(pnnz[i], ncol); 5561 /* diag */ 5562 dntotalcols += nlcols[i * 2 + 0]; 5563 /* off diag */ 5564 ontotalcols += nlcols[i * 2 + 1]; 5565 } 5566 /* We do not need to figure the right number of columns 5567 * since all the calculations will be done by going through the raw data 5568 * */ 5569 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5570 PetscCall(MatSetUp(*P_oth)); 5571 PetscCall(PetscFree(pnnz)); 5572 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5573 /* diag */ 5574 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5575 /* off diag */ 5576 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5577 /* diag */ 5578 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5579 /* off diag */ 5580 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5581 dntotalcols = 0; 5582 ontotalcols = 0; 5583 ntotalcols = 0; 5584 for (i = 0; i < nrows; i++) { 5585 owner = 0; 5586 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5587 /* Set iremote for diag matrix */ 5588 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5589 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5590 iremote[dntotalcols].rank = owner; 5591 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5592 ilocal[dntotalcols++] = ntotalcols++; 5593 } 5594 /* off diag */ 5595 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5596 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5597 oiremote[ontotalcols].rank = owner; 5598 oilocal[ontotalcols++] = ntotalcols++; 5599 } 5600 } 5601 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5602 PetscCall(PetscFree(loffsets)); 5603 PetscCall(PetscFree(nlcols)); 5604 PetscCall(PetscSFCreate(comm, &sf)); 5605 /* P serves as roots and P_oth is leaves 5606 * Diag matrix 5607 * */ 5608 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5609 PetscCall(PetscSFSetFromOptions(sf)); 5610 PetscCall(PetscSFSetUp(sf)); 5611 5612 PetscCall(PetscSFCreate(comm, &osf)); 5613 /* Off diag */ 5614 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5615 PetscCall(PetscSFSetFromOptions(osf)); 5616 PetscCall(PetscSFSetUp(osf)); 5617 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5618 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5619 /* We operate on the matrix internal data for saving memory */ 5620 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5621 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5622 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5623 /* Convert to global indices for diag matrix */ 5624 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5625 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5626 /* We want P_oth store global indices */ 5627 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5628 /* Use memory scalable approach */ 5629 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5630 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5631 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5632 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5633 /* Convert back to local indices */ 5634 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5635 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5636 nout = 0; 5637 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5638 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5639 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5640 /* Exchange values */ 5641 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5642 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5643 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5644 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5645 /* Stop PETSc from shrinking memory */ 5646 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5647 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5648 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5649 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5650 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5651 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5652 PetscCall(PetscSFDestroy(&sf)); 5653 PetscCall(PetscSFDestroy(&osf)); 5654 PetscFunctionReturn(PETSC_SUCCESS); 5655 } 5656 5657 /* 5658 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5659 * This supports MPIAIJ and MAIJ 5660 * */ 5661 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5662 { 5663 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5664 Mat_SeqAIJ *p_oth; 5665 IS rows, map; 5666 PetscHMapI hamp; 5667 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5668 MPI_Comm comm; 5669 PetscSF sf, osf; 5670 PetscBool has; 5671 5672 PetscFunctionBegin; 5673 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5674 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5675 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5676 * and then create a submatrix (that often is an overlapping matrix) 5677 * */ 5678 if (reuse == MAT_INITIAL_MATRIX) { 5679 /* Use a hash table to figure out unique keys */ 5680 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5681 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5682 count = 0; 5683 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5684 for (i = 0; i < a->B->cmap->n; i++) { 5685 key = a->garray[i] / dof; 5686 PetscCall(PetscHMapIHas(hamp, key, &has)); 5687 if (!has) { 5688 mapping[i] = count; 5689 PetscCall(PetscHMapISet(hamp, key, count++)); 5690 } else { 5691 /* Current 'i' has the same value the previous step */ 5692 mapping[i] = count - 1; 5693 } 5694 } 5695 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5696 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5697 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5698 PetscCall(PetscCalloc1(htsize, &rowindices)); 5699 off = 0; 5700 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5701 PetscCall(PetscHMapIDestroy(&hamp)); 5702 PetscCall(PetscSortInt(htsize, rowindices)); 5703 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5704 /* In case, the matrix was already created but users want to recreate the matrix */ 5705 PetscCall(MatDestroy(P_oth)); 5706 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5707 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5708 PetscCall(ISDestroy(&map)); 5709 PetscCall(ISDestroy(&rows)); 5710 } else if (reuse == MAT_REUSE_MATRIX) { 5711 /* If matrix was already created, we simply update values using SF objects 5712 * that as attached to the matrix earlier. 5713 */ 5714 const PetscScalar *pd_a, *po_a; 5715 5716 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5717 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5718 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5719 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5720 /* Update values in place */ 5721 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5722 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5723 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5724 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5725 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5726 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5727 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5728 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5729 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5730 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5731 PetscFunctionReturn(PETSC_SUCCESS); 5732 } 5733 5734 /*@C 5735 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5736 5737 Collective 5738 5739 Input Parameters: 5740 + A - the first matrix in `MATMPIAIJ` format 5741 . B - the second matrix in `MATMPIAIJ` format 5742 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5743 5744 Output Parameters: 5745 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5746 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5747 - B_seq - the sequential matrix generated 5748 5749 Level: developer 5750 5751 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5752 @*/ 5753 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5754 { 5755 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5756 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5757 IS isrowb, iscolb; 5758 Mat *bseq = NULL; 5759 5760 PetscFunctionBegin; 5761 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5762 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5763 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5764 5765 if (scall == MAT_INITIAL_MATRIX) { 5766 start = A->cmap->rstart; 5767 cmap = a->garray; 5768 nzA = a->A->cmap->n; 5769 nzB = a->B->cmap->n; 5770 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5771 ncols = 0; 5772 for (i = 0; i < nzB; i++) { /* row < local row index */ 5773 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5774 else break; 5775 } 5776 imark = i; 5777 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5778 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5779 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5780 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5781 } else { 5782 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5783 isrowb = *rowb; 5784 iscolb = *colb; 5785 PetscCall(PetscMalloc1(1, &bseq)); 5786 bseq[0] = *B_seq; 5787 } 5788 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5789 *B_seq = bseq[0]; 5790 PetscCall(PetscFree(bseq)); 5791 if (!rowb) { 5792 PetscCall(ISDestroy(&isrowb)); 5793 } else { 5794 *rowb = isrowb; 5795 } 5796 if (!colb) { 5797 PetscCall(ISDestroy(&iscolb)); 5798 } else { 5799 *colb = iscolb; 5800 } 5801 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5802 PetscFunctionReturn(PETSC_SUCCESS); 5803 } 5804 5805 /* 5806 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5807 of the OFF-DIAGONAL portion of local A 5808 5809 Collective 5810 5811 Input Parameters: 5812 + A,B - the matrices in `MATMPIAIJ` format 5813 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5814 5815 Output Parameter: 5816 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5817 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5818 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5819 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5820 5821 Developer Note: 5822 This directly accesses information inside the VecScatter associated with the matrix-vector product 5823 for this matrix. This is not desirable.. 5824 5825 Level: developer 5826 5827 */ 5828 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5829 { 5830 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5831 Mat_SeqAIJ *b_oth; 5832 VecScatter ctx; 5833 MPI_Comm comm; 5834 const PetscMPIInt *rprocs, *sprocs; 5835 const PetscInt *srow, *rstarts, *sstarts; 5836 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5837 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5838 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5839 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5840 PetscMPIInt size, tag, rank, nreqs; 5841 5842 PetscFunctionBegin; 5843 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5844 PetscCallMPI(MPI_Comm_size(comm, &size)); 5845 5846 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5847 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5848 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5849 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5850 5851 if (size == 1) { 5852 startsj_s = NULL; 5853 bufa_ptr = NULL; 5854 *B_oth = NULL; 5855 PetscFunctionReturn(PETSC_SUCCESS); 5856 } 5857 5858 ctx = a->Mvctx; 5859 tag = ((PetscObject)ctx)->tag; 5860 5861 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5862 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5863 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5864 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5865 PetscCall(PetscMalloc1(nreqs, &reqs)); 5866 rwaits = reqs; 5867 swaits = reqs + nrecvs; 5868 5869 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5870 if (scall == MAT_INITIAL_MATRIX) { 5871 /* i-array */ 5872 /* post receives */ 5873 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5874 for (i = 0; i < nrecvs; i++) { 5875 rowlen = rvalues + rstarts[i] * rbs; 5876 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5877 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5878 } 5879 5880 /* pack the outgoing message */ 5881 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5882 5883 sstartsj[0] = 0; 5884 rstartsj[0] = 0; 5885 len = 0; /* total length of j or a array to be sent */ 5886 if (nsends) { 5887 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5888 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5889 } 5890 for (i = 0; i < nsends; i++) { 5891 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5892 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5893 for (j = 0; j < nrows; j++) { 5894 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5895 for (l = 0; l < sbs; l++) { 5896 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5897 5898 rowlen[j * sbs + l] = ncols; 5899 5900 len += ncols; 5901 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5902 } 5903 k++; 5904 } 5905 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5906 5907 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5908 } 5909 /* recvs and sends of i-array are completed */ 5910 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5911 PetscCall(PetscFree(svalues)); 5912 5913 /* allocate buffers for sending j and a arrays */ 5914 PetscCall(PetscMalloc1(len + 1, &bufj)); 5915 PetscCall(PetscMalloc1(len + 1, &bufa)); 5916 5917 /* create i-array of B_oth */ 5918 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5919 5920 b_othi[0] = 0; 5921 len = 0; /* total length of j or a array to be received */ 5922 k = 0; 5923 for (i = 0; i < nrecvs; i++) { 5924 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5925 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5926 for (j = 0; j < nrows; j++) { 5927 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5928 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5929 k++; 5930 } 5931 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5932 } 5933 PetscCall(PetscFree(rvalues)); 5934 5935 /* allocate space for j and a arrays of B_oth */ 5936 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5937 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5938 5939 /* j-array */ 5940 /* post receives of j-array */ 5941 for (i = 0; i < nrecvs; i++) { 5942 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5943 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5944 } 5945 5946 /* pack the outgoing message j-array */ 5947 if (nsends) k = sstarts[0]; 5948 for (i = 0; i < nsends; i++) { 5949 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5950 bufJ = bufj + sstartsj[i]; 5951 for (j = 0; j < nrows; j++) { 5952 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5953 for (ll = 0; ll < sbs; ll++) { 5954 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5955 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5956 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5957 } 5958 } 5959 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5960 } 5961 5962 /* recvs and sends of j-array are completed */ 5963 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5964 } else if (scall == MAT_REUSE_MATRIX) { 5965 sstartsj = *startsj_s; 5966 rstartsj = *startsj_r; 5967 bufa = *bufa_ptr; 5968 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5969 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5970 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5971 5972 /* a-array */ 5973 /* post receives of a-array */ 5974 for (i = 0; i < nrecvs; i++) { 5975 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5976 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5977 } 5978 5979 /* pack the outgoing message a-array */ 5980 if (nsends) k = sstarts[0]; 5981 for (i = 0; i < nsends; i++) { 5982 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5983 bufA = bufa + sstartsj[i]; 5984 for (j = 0; j < nrows; j++) { 5985 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5986 for (ll = 0; ll < sbs; ll++) { 5987 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5988 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5989 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5990 } 5991 } 5992 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5993 } 5994 /* recvs and sends of a-array are completed */ 5995 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5996 PetscCall(PetscFree(reqs)); 5997 5998 if (scall == MAT_INITIAL_MATRIX) { 5999 /* put together the new matrix */ 6000 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6001 6002 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6003 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6004 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6005 b_oth->free_a = PETSC_TRUE; 6006 b_oth->free_ij = PETSC_TRUE; 6007 b_oth->nonew = 0; 6008 6009 PetscCall(PetscFree(bufj)); 6010 if (!startsj_s || !bufa_ptr) { 6011 PetscCall(PetscFree2(sstartsj, rstartsj)); 6012 PetscCall(PetscFree(bufa_ptr)); 6013 } else { 6014 *startsj_s = sstartsj; 6015 *startsj_r = rstartsj; 6016 *bufa_ptr = bufa; 6017 } 6018 } else if (scall == MAT_REUSE_MATRIX) { 6019 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6020 } 6021 6022 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6023 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6024 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6025 PetscFunctionReturn(PETSC_SUCCESS); 6026 } 6027 6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6031 #if defined(PETSC_HAVE_MKL_SPARSE) 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6033 #endif 6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6036 #if defined(PETSC_HAVE_ELEMENTAL) 6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6038 #endif 6039 #if defined(PETSC_HAVE_SCALAPACK) 6040 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6041 #endif 6042 #if defined(PETSC_HAVE_HYPRE) 6043 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6044 #endif 6045 #if defined(PETSC_HAVE_CUDA) 6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6047 #endif 6048 #if defined(PETSC_HAVE_HIP) 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6050 #endif 6051 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6053 #endif 6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6055 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6056 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6057 6058 /* 6059 Computes (B'*A')' since computing B*A directly is untenable 6060 6061 n p p 6062 [ ] [ ] [ ] 6063 m [ A ] * n [ B ] = m [ C ] 6064 [ ] [ ] [ ] 6065 6066 */ 6067 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6068 { 6069 Mat At, Bt, Ct; 6070 6071 PetscFunctionBegin; 6072 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6073 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6074 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6075 PetscCall(MatDestroy(&At)); 6076 PetscCall(MatDestroy(&Bt)); 6077 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6078 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6079 PetscCall(MatDestroy(&Ct)); 6080 PetscFunctionReturn(PETSC_SUCCESS); 6081 } 6082 6083 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6084 { 6085 PetscBool cisdense; 6086 6087 PetscFunctionBegin; 6088 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6089 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6090 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6091 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6092 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6093 PetscCall(MatSetUp(C)); 6094 6095 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6096 PetscFunctionReturn(PETSC_SUCCESS); 6097 } 6098 6099 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6100 { 6101 Mat_Product *product = C->product; 6102 Mat A = product->A, B = product->B; 6103 6104 PetscFunctionBegin; 6105 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6106 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6107 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6108 C->ops->productsymbolic = MatProductSymbolic_AB; 6109 PetscFunctionReturn(PETSC_SUCCESS); 6110 } 6111 6112 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6113 { 6114 Mat_Product *product = C->product; 6115 6116 PetscFunctionBegin; 6117 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6118 PetscFunctionReturn(PETSC_SUCCESS); 6119 } 6120 6121 /* 6122 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6123 6124 Input Parameters: 6125 6126 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6127 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6128 6129 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6130 6131 For Set1, j1[] contains column indices of the nonzeros. 6132 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6133 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6134 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6135 6136 Similar for Set2. 6137 6138 This routine merges the two sets of nonzeros row by row and removes repeats. 6139 6140 Output Parameters: (memory is allocated by the caller) 6141 6142 i[],j[]: the CSR of the merged matrix, which has m rows. 6143 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6144 imap2[]: similar to imap1[], but for Set2. 6145 Note we order nonzeros row-by-row and from left to right. 6146 */ 6147 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6148 { 6149 PetscInt r, m; /* Row index of mat */ 6150 PetscCount t, t1, t2, b1, e1, b2, e2; 6151 6152 PetscFunctionBegin; 6153 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6154 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6155 i[0] = 0; 6156 for (r = 0; r < m; r++) { /* Do row by row merging */ 6157 b1 = rowBegin1[r]; 6158 e1 = rowEnd1[r]; 6159 b2 = rowBegin2[r]; 6160 e2 = rowEnd2[r]; 6161 while (b1 < e1 && b2 < e2) { 6162 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6163 j[t] = j1[b1]; 6164 imap1[t1] = t; 6165 imap2[t2] = t; 6166 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6167 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6168 t1++; 6169 t2++; 6170 t++; 6171 } else if (j1[b1] < j2[b2]) { 6172 j[t] = j1[b1]; 6173 imap1[t1] = t; 6174 b1 += jmap1[t1 + 1] - jmap1[t1]; 6175 t1++; 6176 t++; 6177 } else { 6178 j[t] = j2[b2]; 6179 imap2[t2] = t; 6180 b2 += jmap2[t2 + 1] - jmap2[t2]; 6181 t2++; 6182 t++; 6183 } 6184 } 6185 /* Merge the remaining in either j1[] or j2[] */ 6186 while (b1 < e1) { 6187 j[t] = j1[b1]; 6188 imap1[t1] = t; 6189 b1 += jmap1[t1 + 1] - jmap1[t1]; 6190 t1++; 6191 t++; 6192 } 6193 while (b2 < e2) { 6194 j[t] = j2[b2]; 6195 imap2[t2] = t; 6196 b2 += jmap2[t2 + 1] - jmap2[t2]; 6197 t2++; 6198 t++; 6199 } 6200 i[r + 1] = t; 6201 } 6202 PetscFunctionReturn(PETSC_SUCCESS); 6203 } 6204 6205 /* 6206 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6207 6208 Input Parameters: 6209 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6210 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6211 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6212 6213 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6214 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6215 6216 Output Parameters: 6217 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6218 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6219 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6220 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6221 6222 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6223 Atot: number of entries belonging to the diagonal block. 6224 Annz: number of unique nonzeros belonging to the diagonal block. 6225 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6226 repeats (i.e., same 'i,j' pair). 6227 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6228 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6229 6230 Atot: number of entries belonging to the diagonal block 6231 Annz: number of unique nonzeros belonging to the diagonal block. 6232 6233 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6234 6235 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6236 */ 6237 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6238 { 6239 PetscInt cstart, cend, rstart, rend, row, col; 6240 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6241 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6242 PetscCount k, m, p, q, r, s, mid; 6243 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6244 6245 PetscFunctionBegin; 6246 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6247 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6248 m = rend - rstart; 6249 6250 /* Skip negative rows */ 6251 for (k = 0; k < n; k++) 6252 if (i[k] >= 0) break; 6253 6254 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6255 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6256 */ 6257 while (k < n) { 6258 row = i[k]; 6259 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6260 for (s = k; s < n; s++) 6261 if (i[s] != row) break; 6262 6263 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6264 for (p = k; p < s; p++) { 6265 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6266 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6267 } 6268 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6269 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6270 rowBegin[row - rstart] = k; 6271 rowMid[row - rstart] = mid; 6272 rowEnd[row - rstart] = s; 6273 6274 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6275 Atot += mid - k; 6276 Btot += s - mid; 6277 6278 /* Count unique nonzeros of this diag row */ 6279 for (p = k; p < mid;) { 6280 col = j[p]; 6281 do { 6282 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6283 p++; 6284 } while (p < mid && j[p] == col); 6285 Annz++; 6286 } 6287 6288 /* Count unique nonzeros of this offdiag row */ 6289 for (p = mid; p < s;) { 6290 col = j[p]; 6291 do { 6292 p++; 6293 } while (p < s && j[p] == col); 6294 Bnnz++; 6295 } 6296 k = s; 6297 } 6298 6299 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6300 PetscCall(PetscMalloc1(Atot, &Aperm)); 6301 PetscCall(PetscMalloc1(Btot, &Bperm)); 6302 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6303 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6304 6305 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6306 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6307 for (r = 0; r < m; r++) { 6308 k = rowBegin[r]; 6309 mid = rowMid[r]; 6310 s = rowEnd[r]; 6311 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6312 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6313 Atot += mid - k; 6314 Btot += s - mid; 6315 6316 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6317 for (p = k; p < mid;) { 6318 col = j[p]; 6319 q = p; 6320 do { 6321 p++; 6322 } while (p < mid && j[p] == col); 6323 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6324 Annz++; 6325 } 6326 6327 for (p = mid; p < s;) { 6328 col = j[p]; 6329 q = p; 6330 do { 6331 p++; 6332 } while (p < s && j[p] == col); 6333 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6334 Bnnz++; 6335 } 6336 } 6337 /* Output */ 6338 *Aperm_ = Aperm; 6339 *Annz_ = Annz; 6340 *Atot_ = Atot; 6341 *Ajmap_ = Ajmap; 6342 *Bperm_ = Bperm; 6343 *Bnnz_ = Bnnz; 6344 *Btot_ = Btot; 6345 *Bjmap_ = Bjmap; 6346 PetscFunctionReturn(PETSC_SUCCESS); 6347 } 6348 6349 /* 6350 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6351 6352 Input Parameters: 6353 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6354 nnz: number of unique nonzeros in the merged matrix 6355 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6356 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6357 6358 Output Parameter: (memory is allocated by the caller) 6359 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6360 6361 Example: 6362 nnz1 = 4 6363 nnz = 6 6364 imap = [1,3,4,5] 6365 jmap = [0,3,5,6,7] 6366 then, 6367 jmap_new = [0,0,3,3,5,6,7] 6368 */ 6369 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6370 { 6371 PetscCount k, p; 6372 6373 PetscFunctionBegin; 6374 jmap_new[0] = 0; 6375 p = nnz; /* p loops over jmap_new[] backwards */ 6376 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6377 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6378 } 6379 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6380 PetscFunctionReturn(PETSC_SUCCESS); 6381 } 6382 6383 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6384 { 6385 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6386 6387 PetscFunctionBegin; 6388 PetscCall(PetscSFDestroy(&coo->sf)); 6389 PetscCall(PetscFree(coo->Aperm1)); 6390 PetscCall(PetscFree(coo->Bperm1)); 6391 PetscCall(PetscFree(coo->Ajmap1)); 6392 PetscCall(PetscFree(coo->Bjmap1)); 6393 PetscCall(PetscFree(coo->Aimap2)); 6394 PetscCall(PetscFree(coo->Bimap2)); 6395 PetscCall(PetscFree(coo->Aperm2)); 6396 PetscCall(PetscFree(coo->Bperm2)); 6397 PetscCall(PetscFree(coo->Ajmap2)); 6398 PetscCall(PetscFree(coo->Bjmap2)); 6399 PetscCall(PetscFree(coo->Cperm1)); 6400 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6401 PetscCall(PetscFree(coo)); 6402 PetscFunctionReturn(PETSC_SUCCESS); 6403 } 6404 6405 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6406 { 6407 MPI_Comm comm; 6408 PetscMPIInt rank, size; 6409 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6410 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6411 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6412 PetscContainer container; 6413 MatCOOStruct_MPIAIJ *coo; 6414 6415 PetscFunctionBegin; 6416 PetscCall(PetscFree(mpiaij->garray)); 6417 PetscCall(VecDestroy(&mpiaij->lvec)); 6418 #if defined(PETSC_USE_CTABLE) 6419 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6420 #else 6421 PetscCall(PetscFree(mpiaij->colmap)); 6422 #endif 6423 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6424 mat->assembled = PETSC_FALSE; 6425 mat->was_assembled = PETSC_FALSE; 6426 6427 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6428 PetscCallMPI(MPI_Comm_size(comm, &size)); 6429 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6430 PetscCall(PetscLayoutSetUp(mat->rmap)); 6431 PetscCall(PetscLayoutSetUp(mat->cmap)); 6432 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6433 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6434 PetscCall(MatGetLocalSize(mat, &m, &n)); 6435 PetscCall(MatGetSize(mat, &M, &N)); 6436 6437 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6438 /* entries come first, then local rows, then remote rows. */ 6439 PetscCount n1 = coo_n, *perm1; 6440 PetscInt *i1 = coo_i, *j1 = coo_j; 6441 6442 PetscCall(PetscMalloc1(n1, &perm1)); 6443 for (k = 0; k < n1; k++) perm1[k] = k; 6444 6445 /* Manipulate indices so that entries with negative row or col indices will have smallest 6446 row indices, local entries will have greater but negative row indices, and remote entries 6447 will have positive row indices. 6448 */ 6449 for (k = 0; k < n1; k++) { 6450 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6451 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6452 else { 6453 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6454 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6455 } 6456 } 6457 6458 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6459 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6460 6461 /* Advance k to the first entry we need to take care of */ 6462 for (k = 0; k < n1; k++) 6463 if (i1[k] > PETSC_MIN_INT) break; 6464 PetscInt i1start = k; 6465 6466 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6467 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6468 6469 /* Send remote rows to their owner */ 6470 /* Find which rows should be sent to which remote ranks*/ 6471 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6472 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6473 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6474 const PetscInt *ranges; 6475 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6476 6477 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6478 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6479 for (k = rem; k < n1;) { 6480 PetscMPIInt owner; 6481 PetscInt firstRow, lastRow; 6482 6483 /* Locate a row range */ 6484 firstRow = i1[k]; /* first row of this owner */ 6485 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6486 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6487 6488 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6489 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6490 6491 /* All entries in [k,p) belong to this remote owner */ 6492 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6493 PetscMPIInt *sendto2; 6494 PetscInt *nentries2; 6495 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6496 6497 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6498 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6499 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6500 PetscCall(PetscFree2(sendto, nentries2)); 6501 sendto = sendto2; 6502 nentries = nentries2; 6503 maxNsend = maxNsend2; 6504 } 6505 sendto[nsend] = owner; 6506 nentries[nsend] = p - k; 6507 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6508 nsend++; 6509 k = p; 6510 } 6511 6512 /* Build 1st SF to know offsets on remote to send data */ 6513 PetscSF sf1; 6514 PetscInt nroots = 1, nroots2 = 0; 6515 PetscInt nleaves = nsend, nleaves2 = 0; 6516 PetscInt *offsets; 6517 PetscSFNode *iremote; 6518 6519 PetscCall(PetscSFCreate(comm, &sf1)); 6520 PetscCall(PetscMalloc1(nsend, &iremote)); 6521 PetscCall(PetscMalloc1(nsend, &offsets)); 6522 for (k = 0; k < nsend; k++) { 6523 iremote[k].rank = sendto[k]; 6524 iremote[k].index = 0; 6525 nleaves2 += nentries[k]; 6526 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6527 } 6528 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6529 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6530 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6531 PetscCall(PetscSFDestroy(&sf1)); 6532 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6533 6534 /* Build 2nd SF to send remote COOs to their owner */ 6535 PetscSF sf2; 6536 nroots = nroots2; 6537 nleaves = nleaves2; 6538 PetscCall(PetscSFCreate(comm, &sf2)); 6539 PetscCall(PetscSFSetFromOptions(sf2)); 6540 PetscCall(PetscMalloc1(nleaves, &iremote)); 6541 p = 0; 6542 for (k = 0; k < nsend; k++) { 6543 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6544 for (q = 0; q < nentries[k]; q++, p++) { 6545 iremote[p].rank = sendto[k]; 6546 iremote[p].index = offsets[k] + q; 6547 } 6548 } 6549 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6550 6551 /* Send the remote COOs to their owner */ 6552 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6553 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6554 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6555 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6556 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6557 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6558 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6559 6560 PetscCall(PetscFree(offsets)); 6561 PetscCall(PetscFree2(sendto, nentries)); 6562 6563 /* Sort received COOs by row along with the permutation array */ 6564 for (k = 0; k < n2; k++) perm2[k] = k; 6565 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6566 6567 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6568 PetscCount *Cperm1; 6569 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6570 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves)); 6571 6572 /* Support for HYPRE matrices, kind of a hack. 6573 Swap min column with diagonal so that diagonal values will go first */ 6574 PetscBool hypre; 6575 const char *name; 6576 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6577 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6578 if (hypre) { 6579 PetscInt *minj; 6580 PetscBT hasdiag; 6581 6582 PetscCall(PetscBTCreate(m, &hasdiag)); 6583 PetscCall(PetscMalloc1(m, &minj)); 6584 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6585 for (k = i1start; k < rem; k++) { 6586 if (j1[k] < cstart || j1[k] >= cend) continue; 6587 const PetscInt rindex = i1[k] - rstart; 6588 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6589 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6590 } 6591 for (k = 0; k < n2; k++) { 6592 if (j2[k] < cstart || j2[k] >= cend) continue; 6593 const PetscInt rindex = i2[k] - rstart; 6594 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6595 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6596 } 6597 for (k = i1start; k < rem; k++) { 6598 const PetscInt rindex = i1[k] - rstart; 6599 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6600 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6601 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6602 } 6603 for (k = 0; k < n2; k++) { 6604 const PetscInt rindex = i2[k] - rstart; 6605 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6606 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6607 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6608 } 6609 PetscCall(PetscBTDestroy(&hasdiag)); 6610 PetscCall(PetscFree(minj)); 6611 } 6612 6613 /* Split local COOs and received COOs into diag/offdiag portions */ 6614 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6615 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6616 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6617 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6618 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6619 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6620 6621 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6622 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6623 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6624 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6625 6626 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6627 PetscInt *Ai, *Bi; 6628 PetscInt *Aj, *Bj; 6629 6630 PetscCall(PetscMalloc1(m + 1, &Ai)); 6631 PetscCall(PetscMalloc1(m + 1, &Bi)); 6632 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6633 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6634 6635 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6636 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6637 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6638 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6639 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6640 6641 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6642 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6643 6644 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6645 /* expect nonzeros in A/B most likely have local contributing entries */ 6646 PetscInt Annz = Ai[m]; 6647 PetscInt Bnnz = Bi[m]; 6648 PetscCount *Ajmap1_new, *Bjmap1_new; 6649 6650 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6651 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6652 6653 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6654 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6655 6656 PetscCall(PetscFree(Aimap1)); 6657 PetscCall(PetscFree(Ajmap1)); 6658 PetscCall(PetscFree(Bimap1)); 6659 PetscCall(PetscFree(Bjmap1)); 6660 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6661 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6662 PetscCall(PetscFree(perm1)); 6663 PetscCall(PetscFree3(i2, j2, perm2)); 6664 6665 Ajmap1 = Ajmap1_new; 6666 Bjmap1 = Bjmap1_new; 6667 6668 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6669 if (Annz < Annz1 + Annz2) { 6670 PetscInt *Aj_new; 6671 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6672 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6673 PetscCall(PetscFree(Aj)); 6674 Aj = Aj_new; 6675 } 6676 6677 if (Bnnz < Bnnz1 + Bnnz2) { 6678 PetscInt *Bj_new; 6679 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6680 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6681 PetscCall(PetscFree(Bj)); 6682 Bj = Bj_new; 6683 } 6684 6685 /* Create new submatrices for on-process and off-process coupling */ 6686 PetscScalar *Aa, *Ba; 6687 MatType rtype; 6688 Mat_SeqAIJ *a, *b; 6689 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6690 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6691 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6692 if (cstart) { 6693 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6694 } 6695 PetscCall(MatDestroy(&mpiaij->A)); 6696 PetscCall(MatDestroy(&mpiaij->B)); 6697 PetscCall(MatGetRootType_Private(mat, &rtype)); 6698 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6699 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6700 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6701 6702 a = (Mat_SeqAIJ *)mpiaij->A->data; 6703 b = (Mat_SeqAIJ *)mpiaij->B->data; 6704 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6705 a->free_a = b->free_a = PETSC_TRUE; 6706 a->free_ij = b->free_ij = PETSC_TRUE; 6707 6708 /* conversion must happen AFTER multiply setup */ 6709 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6710 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6711 PetscCall(VecDestroy(&mpiaij->lvec)); 6712 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6713 6714 // Put the COO struct in a container and then attach that to the matrix 6715 PetscCall(PetscMalloc1(1, &coo)); 6716 coo->n = coo_n; 6717 coo->sf = sf2; 6718 coo->sendlen = nleaves; 6719 coo->recvlen = nroots; 6720 coo->Annz = Annz; 6721 coo->Bnnz = Bnnz; 6722 coo->Annz2 = Annz2; 6723 coo->Bnnz2 = Bnnz2; 6724 coo->Atot1 = Atot1; 6725 coo->Atot2 = Atot2; 6726 coo->Btot1 = Btot1; 6727 coo->Btot2 = Btot2; 6728 coo->Ajmap1 = Ajmap1; 6729 coo->Aperm1 = Aperm1; 6730 coo->Bjmap1 = Bjmap1; 6731 coo->Bperm1 = Bperm1; 6732 coo->Aimap2 = Aimap2; 6733 coo->Ajmap2 = Ajmap2; 6734 coo->Aperm2 = Aperm2; 6735 coo->Bimap2 = Bimap2; 6736 coo->Bjmap2 = Bjmap2; 6737 coo->Bperm2 = Bperm2; 6738 coo->Cperm1 = Cperm1; 6739 // Allocate in preallocation. If not used, it has zero cost on host 6740 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6741 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6742 PetscCall(PetscContainerSetPointer(container, coo)); 6743 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6744 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6745 PetscCall(PetscContainerDestroy(&container)); 6746 PetscFunctionReturn(PETSC_SUCCESS); 6747 } 6748 6749 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6750 { 6751 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6752 Mat A = mpiaij->A, B = mpiaij->B; 6753 PetscScalar *Aa, *Ba; 6754 PetscScalar *sendbuf, *recvbuf; 6755 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6756 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6757 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6758 const PetscCount *Cperm1; 6759 PetscContainer container; 6760 MatCOOStruct_MPIAIJ *coo; 6761 6762 PetscFunctionBegin; 6763 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6764 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6765 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6766 sendbuf = coo->sendbuf; 6767 recvbuf = coo->recvbuf; 6768 Ajmap1 = coo->Ajmap1; 6769 Ajmap2 = coo->Ajmap2; 6770 Aimap2 = coo->Aimap2; 6771 Bjmap1 = coo->Bjmap1; 6772 Bjmap2 = coo->Bjmap2; 6773 Bimap2 = coo->Bimap2; 6774 Aperm1 = coo->Aperm1; 6775 Aperm2 = coo->Aperm2; 6776 Bperm1 = coo->Bperm1; 6777 Bperm2 = coo->Bperm2; 6778 Cperm1 = coo->Cperm1; 6779 6780 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6781 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6782 6783 /* Pack entries to be sent to remote */ 6784 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6785 6786 /* Send remote entries to their owner and overlap the communication with local computation */ 6787 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6788 /* Add local entries to A and B */ 6789 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6790 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6791 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6792 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6793 } 6794 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6795 PetscScalar sum = 0.0; 6796 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6797 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6798 } 6799 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6800 6801 /* Add received remote entries to A and B */ 6802 for (PetscCount i = 0; i < coo->Annz2; i++) { 6803 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6804 } 6805 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6806 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6807 } 6808 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6809 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6810 PetscFunctionReturn(PETSC_SUCCESS); 6811 } 6812 6813 /*MC 6814 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6815 6816 Options Database Keys: 6817 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6818 6819 Level: beginner 6820 6821 Notes: 6822 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6823 in this case the values associated with the rows and columns one passes in are set to zero 6824 in the matrix 6825 6826 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6827 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6828 6829 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6830 M*/ 6831 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6832 { 6833 Mat_MPIAIJ *b; 6834 PetscMPIInt size; 6835 6836 PetscFunctionBegin; 6837 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6838 6839 PetscCall(PetscNew(&b)); 6840 B->data = (void *)b; 6841 B->ops[0] = MatOps_Values; 6842 B->assembled = PETSC_FALSE; 6843 B->insertmode = NOT_SET_VALUES; 6844 b->size = size; 6845 6846 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6847 6848 /* build cache for off array entries formed */ 6849 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6850 6851 b->donotstash = PETSC_FALSE; 6852 b->colmap = NULL; 6853 b->garray = NULL; 6854 b->roworiented = PETSC_TRUE; 6855 6856 /* stuff used for matrix vector multiply */ 6857 b->lvec = NULL; 6858 b->Mvctx = NULL; 6859 6860 /* stuff for MatGetRow() */ 6861 b->rowindices = NULL; 6862 b->rowvalues = NULL; 6863 b->getrowactive = PETSC_FALSE; 6864 6865 /* flexible pointer used in CUSPARSE classes */ 6866 b->spptr = NULL; 6867 6868 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6869 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6870 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6871 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6872 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6873 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6874 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6875 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6876 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6877 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6878 #if defined(PETSC_HAVE_CUDA) 6879 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6880 #endif 6881 #if defined(PETSC_HAVE_HIP) 6882 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6883 #endif 6884 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6885 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6886 #endif 6887 #if defined(PETSC_HAVE_MKL_SPARSE) 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6889 #endif 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6894 #if defined(PETSC_HAVE_ELEMENTAL) 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6896 #endif 6897 #if defined(PETSC_HAVE_SCALAPACK) 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6899 #endif 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6902 #if defined(PETSC_HAVE_HYPRE) 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6905 #endif 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6910 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6911 PetscFunctionReturn(PETSC_SUCCESS); 6912 } 6913 6914 /*@C 6915 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6916 and "off-diagonal" part of the matrix in CSR format. 6917 6918 Collective 6919 6920 Input Parameters: 6921 + comm - MPI communicator 6922 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6923 . n - This value should be the same as the local size used in creating the 6924 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6925 calculated if `N` is given) For square matrices `n` is almost always `m`. 6926 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6927 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6928 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6929 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6930 . a - matrix values 6931 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6932 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6933 - oa - matrix values 6934 6935 Output Parameter: 6936 . mat - the matrix 6937 6938 Level: advanced 6939 6940 Notes: 6941 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6942 must free the arrays once the matrix has been destroyed and not before. 6943 6944 The `i` and `j` indices are 0 based 6945 6946 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6947 6948 This sets local rows and cannot be used to set off-processor values. 6949 6950 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6951 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6952 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6953 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6954 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6955 communication if it is known that only local entries will be set. 6956 6957 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6958 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6959 @*/ 6960 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6961 { 6962 Mat_MPIAIJ *maij; 6963 6964 PetscFunctionBegin; 6965 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6966 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6967 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6968 PetscCall(MatCreate(comm, mat)); 6969 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6970 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6971 maij = (Mat_MPIAIJ *)(*mat)->data; 6972 6973 (*mat)->preallocated = PETSC_TRUE; 6974 6975 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6976 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6977 6978 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6979 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6980 6981 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6982 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6983 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6984 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6985 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6986 PetscFunctionReturn(PETSC_SUCCESS); 6987 } 6988 6989 typedef struct { 6990 Mat *mp; /* intermediate products */ 6991 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6992 PetscInt cp; /* number of intermediate products */ 6993 6994 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6995 PetscInt *startsj_s, *startsj_r; 6996 PetscScalar *bufa; 6997 Mat P_oth; 6998 6999 /* may take advantage of merging product->B */ 7000 Mat Bloc; /* B-local by merging diag and off-diag */ 7001 7002 /* cusparse does not have support to split between symbolic and numeric phases. 7003 When api_user is true, we don't need to update the numerical values 7004 of the temporary storage */ 7005 PetscBool reusesym; 7006 7007 /* support for COO values insertion */ 7008 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7009 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7010 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7011 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7012 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7013 PetscMemType mtype; 7014 7015 /* customization */ 7016 PetscBool abmerge; 7017 PetscBool P_oth_bind; 7018 } MatMatMPIAIJBACKEND; 7019 7020 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7021 { 7022 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7023 PetscInt i; 7024 7025 PetscFunctionBegin; 7026 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7027 PetscCall(PetscFree(mmdata->bufa)); 7028 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7029 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7030 PetscCall(MatDestroy(&mmdata->P_oth)); 7031 PetscCall(MatDestroy(&mmdata->Bloc)); 7032 PetscCall(PetscSFDestroy(&mmdata->sf)); 7033 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7034 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7035 PetscCall(PetscFree(mmdata->own[0])); 7036 PetscCall(PetscFree(mmdata->own)); 7037 PetscCall(PetscFree(mmdata->off[0])); 7038 PetscCall(PetscFree(mmdata->off)); 7039 PetscCall(PetscFree(mmdata)); 7040 PetscFunctionReturn(PETSC_SUCCESS); 7041 } 7042 7043 /* Copy selected n entries with indices in idx[] of A to v[]. 7044 If idx is NULL, copy the whole data array of A to v[] 7045 */ 7046 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7047 { 7048 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7049 7050 PetscFunctionBegin; 7051 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7052 if (f) { 7053 PetscCall((*f)(A, n, idx, v)); 7054 } else { 7055 const PetscScalar *vv; 7056 7057 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7058 if (n && idx) { 7059 PetscScalar *w = v; 7060 const PetscInt *oi = idx; 7061 PetscInt j; 7062 7063 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7064 } else { 7065 PetscCall(PetscArraycpy(v, vv, n)); 7066 } 7067 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7068 } 7069 PetscFunctionReturn(PETSC_SUCCESS); 7070 } 7071 7072 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7073 { 7074 MatMatMPIAIJBACKEND *mmdata; 7075 PetscInt i, n_d, n_o; 7076 7077 PetscFunctionBegin; 7078 MatCheckProduct(C, 1); 7079 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7080 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7081 if (!mmdata->reusesym) { /* update temporary matrices */ 7082 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7083 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7084 } 7085 mmdata->reusesym = PETSC_FALSE; 7086 7087 for (i = 0; i < mmdata->cp; i++) { 7088 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7089 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7090 } 7091 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7092 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7093 7094 if (mmdata->mptmp[i]) continue; 7095 if (noff) { 7096 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7097 7098 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7099 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7100 n_o += noff; 7101 n_d += nown; 7102 } else { 7103 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7104 7105 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7106 n_d += mm->nz; 7107 } 7108 } 7109 if (mmdata->hasoffproc) { /* offprocess insertion */ 7110 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7111 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7112 } 7113 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7114 PetscFunctionReturn(PETSC_SUCCESS); 7115 } 7116 7117 /* Support for Pt * A, A * P, or Pt * A * P */ 7118 #define MAX_NUMBER_INTERMEDIATE 4 7119 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7120 { 7121 Mat_Product *product = C->product; 7122 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7123 Mat_MPIAIJ *a, *p; 7124 MatMatMPIAIJBACKEND *mmdata; 7125 ISLocalToGlobalMapping P_oth_l2g = NULL; 7126 IS glob = NULL; 7127 const char *prefix; 7128 char pprefix[256]; 7129 const PetscInt *globidx, *P_oth_idx; 7130 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7131 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7132 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7133 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7134 /* a base offset; type-2: sparse with a local to global map table */ 7135 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7136 7137 MatProductType ptype; 7138 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7139 PetscMPIInt size; 7140 7141 PetscFunctionBegin; 7142 MatCheckProduct(C, 1); 7143 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7144 ptype = product->type; 7145 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7146 ptype = MATPRODUCT_AB; 7147 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7148 } 7149 switch (ptype) { 7150 case MATPRODUCT_AB: 7151 A = product->A; 7152 P = product->B; 7153 m = A->rmap->n; 7154 n = P->cmap->n; 7155 M = A->rmap->N; 7156 N = P->cmap->N; 7157 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7158 break; 7159 case MATPRODUCT_AtB: 7160 P = product->A; 7161 A = product->B; 7162 m = P->cmap->n; 7163 n = A->cmap->n; 7164 M = P->cmap->N; 7165 N = A->cmap->N; 7166 hasoffproc = PETSC_TRUE; 7167 break; 7168 case MATPRODUCT_PtAP: 7169 A = product->A; 7170 P = product->B; 7171 m = P->cmap->n; 7172 n = P->cmap->n; 7173 M = P->cmap->N; 7174 N = P->cmap->N; 7175 hasoffproc = PETSC_TRUE; 7176 break; 7177 default: 7178 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7179 } 7180 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7181 if (size == 1) hasoffproc = PETSC_FALSE; 7182 7183 /* defaults */ 7184 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7185 mp[i] = NULL; 7186 mptmp[i] = PETSC_FALSE; 7187 rmapt[i] = -1; 7188 cmapt[i] = -1; 7189 rmapa[i] = NULL; 7190 cmapa[i] = NULL; 7191 } 7192 7193 /* customization */ 7194 PetscCall(PetscNew(&mmdata)); 7195 mmdata->reusesym = product->api_user; 7196 if (ptype == MATPRODUCT_AB) { 7197 if (product->api_user) { 7198 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7199 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7200 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7201 PetscOptionsEnd(); 7202 } else { 7203 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7204 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7205 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7206 PetscOptionsEnd(); 7207 } 7208 } else if (ptype == MATPRODUCT_PtAP) { 7209 if (product->api_user) { 7210 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7211 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7212 PetscOptionsEnd(); 7213 } else { 7214 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7215 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7216 PetscOptionsEnd(); 7217 } 7218 } 7219 a = (Mat_MPIAIJ *)A->data; 7220 p = (Mat_MPIAIJ *)P->data; 7221 PetscCall(MatSetSizes(C, m, n, M, N)); 7222 PetscCall(PetscLayoutSetUp(C->rmap)); 7223 PetscCall(PetscLayoutSetUp(C->cmap)); 7224 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7225 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7226 7227 cp = 0; 7228 switch (ptype) { 7229 case MATPRODUCT_AB: /* A * P */ 7230 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7231 7232 /* A_diag * P_local (merged or not) */ 7233 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7234 /* P is product->B */ 7235 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7236 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7237 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7238 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7239 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7240 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7241 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7242 mp[cp]->product->api_user = product->api_user; 7243 PetscCall(MatProductSetFromOptions(mp[cp])); 7244 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7245 PetscCall(ISGetIndices(glob, &globidx)); 7246 rmapt[cp] = 1; 7247 cmapt[cp] = 2; 7248 cmapa[cp] = globidx; 7249 mptmp[cp] = PETSC_FALSE; 7250 cp++; 7251 } else { /* A_diag * P_diag and A_diag * P_off */ 7252 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7253 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7254 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7255 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7256 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7257 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7258 mp[cp]->product->api_user = product->api_user; 7259 PetscCall(MatProductSetFromOptions(mp[cp])); 7260 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7261 rmapt[cp] = 1; 7262 cmapt[cp] = 1; 7263 mptmp[cp] = PETSC_FALSE; 7264 cp++; 7265 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7266 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7267 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7268 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7269 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7270 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7271 mp[cp]->product->api_user = product->api_user; 7272 PetscCall(MatProductSetFromOptions(mp[cp])); 7273 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7274 rmapt[cp] = 1; 7275 cmapt[cp] = 2; 7276 cmapa[cp] = p->garray; 7277 mptmp[cp] = PETSC_FALSE; 7278 cp++; 7279 } 7280 7281 /* A_off * P_other */ 7282 if (mmdata->P_oth) { 7283 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7284 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7285 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7286 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7287 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7288 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7289 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7290 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7291 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7292 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7293 mp[cp]->product->api_user = product->api_user; 7294 PetscCall(MatProductSetFromOptions(mp[cp])); 7295 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7296 rmapt[cp] = 1; 7297 cmapt[cp] = 2; 7298 cmapa[cp] = P_oth_idx; 7299 mptmp[cp] = PETSC_FALSE; 7300 cp++; 7301 } 7302 break; 7303 7304 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7305 /* A is product->B */ 7306 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7307 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7308 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7309 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7310 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7311 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7312 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7313 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7314 mp[cp]->product->api_user = product->api_user; 7315 PetscCall(MatProductSetFromOptions(mp[cp])); 7316 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7317 PetscCall(ISGetIndices(glob, &globidx)); 7318 rmapt[cp] = 2; 7319 rmapa[cp] = globidx; 7320 cmapt[cp] = 2; 7321 cmapa[cp] = globidx; 7322 mptmp[cp] = PETSC_FALSE; 7323 cp++; 7324 } else { 7325 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7326 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7327 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7328 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7329 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7330 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7331 mp[cp]->product->api_user = product->api_user; 7332 PetscCall(MatProductSetFromOptions(mp[cp])); 7333 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7334 PetscCall(ISGetIndices(glob, &globidx)); 7335 rmapt[cp] = 1; 7336 cmapt[cp] = 2; 7337 cmapa[cp] = globidx; 7338 mptmp[cp] = PETSC_FALSE; 7339 cp++; 7340 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7341 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7342 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7343 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7344 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7345 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7346 mp[cp]->product->api_user = product->api_user; 7347 PetscCall(MatProductSetFromOptions(mp[cp])); 7348 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7349 rmapt[cp] = 2; 7350 rmapa[cp] = p->garray; 7351 cmapt[cp] = 2; 7352 cmapa[cp] = globidx; 7353 mptmp[cp] = PETSC_FALSE; 7354 cp++; 7355 } 7356 break; 7357 case MATPRODUCT_PtAP: 7358 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7359 /* P is product->B */ 7360 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7361 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7362 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7363 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7364 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7365 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7366 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7367 mp[cp]->product->api_user = product->api_user; 7368 PetscCall(MatProductSetFromOptions(mp[cp])); 7369 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7370 PetscCall(ISGetIndices(glob, &globidx)); 7371 rmapt[cp] = 2; 7372 rmapa[cp] = globidx; 7373 cmapt[cp] = 2; 7374 cmapa[cp] = globidx; 7375 mptmp[cp] = PETSC_FALSE; 7376 cp++; 7377 if (mmdata->P_oth) { 7378 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7379 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7380 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7381 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7382 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7383 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7384 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7385 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7386 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7387 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7388 mp[cp]->product->api_user = product->api_user; 7389 PetscCall(MatProductSetFromOptions(mp[cp])); 7390 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7391 mptmp[cp] = PETSC_TRUE; 7392 cp++; 7393 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7394 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7395 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7396 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7397 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7398 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7399 mp[cp]->product->api_user = product->api_user; 7400 PetscCall(MatProductSetFromOptions(mp[cp])); 7401 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7402 rmapt[cp] = 2; 7403 rmapa[cp] = globidx; 7404 cmapt[cp] = 2; 7405 cmapa[cp] = P_oth_idx; 7406 mptmp[cp] = PETSC_FALSE; 7407 cp++; 7408 } 7409 break; 7410 default: 7411 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7412 } 7413 /* sanity check */ 7414 if (size > 1) 7415 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7416 7417 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7418 for (i = 0; i < cp; i++) { 7419 mmdata->mp[i] = mp[i]; 7420 mmdata->mptmp[i] = mptmp[i]; 7421 } 7422 mmdata->cp = cp; 7423 C->product->data = mmdata; 7424 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7425 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7426 7427 /* memory type */ 7428 mmdata->mtype = PETSC_MEMTYPE_HOST; 7429 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7430 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7431 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7432 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7433 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7434 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7435 7436 /* prepare coo coordinates for values insertion */ 7437 7438 /* count total nonzeros of those intermediate seqaij Mats 7439 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7440 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7441 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7442 */ 7443 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7444 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7445 if (mptmp[cp]) continue; 7446 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7447 const PetscInt *rmap = rmapa[cp]; 7448 const PetscInt mr = mp[cp]->rmap->n; 7449 const PetscInt rs = C->rmap->rstart; 7450 const PetscInt re = C->rmap->rend; 7451 const PetscInt *ii = mm->i; 7452 for (i = 0; i < mr; i++) { 7453 const PetscInt gr = rmap[i]; 7454 const PetscInt nz = ii[i + 1] - ii[i]; 7455 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7456 else ncoo_oown += nz; /* this row is local */ 7457 } 7458 } else ncoo_d += mm->nz; 7459 } 7460 7461 /* 7462 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7463 7464 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7465 7466 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7467 7468 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7469 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7470 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7471 7472 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7473 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7474 */ 7475 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7476 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7477 7478 /* gather (i,j) of nonzeros inserted by remote procs */ 7479 if (hasoffproc) { 7480 PetscSF msf; 7481 PetscInt ncoo2, *coo_i2, *coo_j2; 7482 7483 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7484 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7485 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7486 7487 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7488 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7489 PetscInt *idxoff = mmdata->off[cp]; 7490 PetscInt *idxown = mmdata->own[cp]; 7491 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7492 const PetscInt *rmap = rmapa[cp]; 7493 const PetscInt *cmap = cmapa[cp]; 7494 const PetscInt *ii = mm->i; 7495 PetscInt *coi = coo_i + ncoo_o; 7496 PetscInt *coj = coo_j + ncoo_o; 7497 const PetscInt mr = mp[cp]->rmap->n; 7498 const PetscInt rs = C->rmap->rstart; 7499 const PetscInt re = C->rmap->rend; 7500 const PetscInt cs = C->cmap->rstart; 7501 for (i = 0; i < mr; i++) { 7502 const PetscInt *jj = mm->j + ii[i]; 7503 const PetscInt gr = rmap[i]; 7504 const PetscInt nz = ii[i + 1] - ii[i]; 7505 if (gr < rs || gr >= re) { /* this is an offproc row */ 7506 for (j = ii[i]; j < ii[i + 1]; j++) { 7507 *coi++ = gr; 7508 *idxoff++ = j; 7509 } 7510 if (!cmapt[cp]) { /* already global */ 7511 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7512 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7513 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7514 } else { /* offdiag */ 7515 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7516 } 7517 ncoo_o += nz; 7518 } else { /* this is a local row */ 7519 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7520 } 7521 } 7522 } 7523 mmdata->off[cp + 1] = idxoff; 7524 mmdata->own[cp + 1] = idxown; 7525 } 7526 7527 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7528 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7529 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7530 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7531 ncoo = ncoo_d + ncoo_oown + ncoo2; 7532 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7533 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7534 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7535 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7536 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7537 PetscCall(PetscFree2(coo_i, coo_j)); 7538 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7539 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7540 coo_i = coo_i2; 7541 coo_j = coo_j2; 7542 } else { /* no offproc values insertion */ 7543 ncoo = ncoo_d; 7544 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7545 7546 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7547 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7548 PetscCall(PetscSFSetUp(mmdata->sf)); 7549 } 7550 mmdata->hasoffproc = hasoffproc; 7551 7552 /* gather (i,j) of nonzeros inserted locally */ 7553 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7554 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7555 PetscInt *coi = coo_i + ncoo_d; 7556 PetscInt *coj = coo_j + ncoo_d; 7557 const PetscInt *jj = mm->j; 7558 const PetscInt *ii = mm->i; 7559 const PetscInt *cmap = cmapa[cp]; 7560 const PetscInt *rmap = rmapa[cp]; 7561 const PetscInt mr = mp[cp]->rmap->n; 7562 const PetscInt rs = C->rmap->rstart; 7563 const PetscInt re = C->rmap->rend; 7564 const PetscInt cs = C->cmap->rstart; 7565 7566 if (mptmp[cp]) continue; 7567 if (rmapt[cp] == 1) { /* consecutive rows */ 7568 /* fill coo_i */ 7569 for (i = 0; i < mr; i++) { 7570 const PetscInt gr = i + rs; 7571 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7572 } 7573 /* fill coo_j */ 7574 if (!cmapt[cp]) { /* type-0, already global */ 7575 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7576 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7577 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7578 } else { /* type-2, local to global for sparse columns */ 7579 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7580 } 7581 ncoo_d += mm->nz; 7582 } else if (rmapt[cp] == 2) { /* sparse rows */ 7583 for (i = 0; i < mr; i++) { 7584 const PetscInt *jj = mm->j + ii[i]; 7585 const PetscInt gr = rmap[i]; 7586 const PetscInt nz = ii[i + 1] - ii[i]; 7587 if (gr >= rs && gr < re) { /* local rows */ 7588 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7589 if (!cmapt[cp]) { /* type-0, already global */ 7590 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7591 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7592 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7593 } else { /* type-2, local to global for sparse columns */ 7594 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7595 } 7596 ncoo_d += nz; 7597 } 7598 } 7599 } 7600 } 7601 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7602 PetscCall(ISDestroy(&glob)); 7603 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7604 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7605 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7606 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7607 7608 /* preallocate with COO data */ 7609 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7610 PetscCall(PetscFree2(coo_i, coo_j)); 7611 PetscFunctionReturn(PETSC_SUCCESS); 7612 } 7613 7614 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7615 { 7616 Mat_Product *product = mat->product; 7617 #if defined(PETSC_HAVE_DEVICE) 7618 PetscBool match = PETSC_FALSE; 7619 PetscBool usecpu = PETSC_FALSE; 7620 #else 7621 PetscBool match = PETSC_TRUE; 7622 #endif 7623 7624 PetscFunctionBegin; 7625 MatCheckProduct(mat, 1); 7626 #if defined(PETSC_HAVE_DEVICE) 7627 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7628 if (match) { /* we can always fallback to the CPU if requested */ 7629 switch (product->type) { 7630 case MATPRODUCT_AB: 7631 if (product->api_user) { 7632 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7633 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7634 PetscOptionsEnd(); 7635 } else { 7636 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7637 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7638 PetscOptionsEnd(); 7639 } 7640 break; 7641 case MATPRODUCT_AtB: 7642 if (product->api_user) { 7643 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7644 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7645 PetscOptionsEnd(); 7646 } else { 7647 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7648 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7649 PetscOptionsEnd(); 7650 } 7651 break; 7652 case MATPRODUCT_PtAP: 7653 if (product->api_user) { 7654 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7655 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7656 PetscOptionsEnd(); 7657 } else { 7658 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7659 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7660 PetscOptionsEnd(); 7661 } 7662 break; 7663 default: 7664 break; 7665 } 7666 match = (PetscBool)!usecpu; 7667 } 7668 #endif 7669 if (match) { 7670 switch (product->type) { 7671 case MATPRODUCT_AB: 7672 case MATPRODUCT_AtB: 7673 case MATPRODUCT_PtAP: 7674 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7675 break; 7676 default: 7677 break; 7678 } 7679 } 7680 /* fallback to MPIAIJ ops */ 7681 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7682 PetscFunctionReturn(PETSC_SUCCESS); 7683 } 7684 7685 /* 7686 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7687 7688 n - the number of block indices in cc[] 7689 cc - the block indices (must be large enough to contain the indices) 7690 */ 7691 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7692 { 7693 PetscInt cnt = -1, nidx, j; 7694 const PetscInt *idx; 7695 7696 PetscFunctionBegin; 7697 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7698 if (nidx) { 7699 cnt = 0; 7700 cc[cnt] = idx[0] / bs; 7701 for (j = 1; j < nidx; j++) { 7702 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7703 } 7704 } 7705 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7706 *n = cnt + 1; 7707 PetscFunctionReturn(PETSC_SUCCESS); 7708 } 7709 7710 /* 7711 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7712 7713 ncollapsed - the number of block indices 7714 collapsed - the block indices (must be large enough to contain the indices) 7715 */ 7716 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7717 { 7718 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7719 7720 PetscFunctionBegin; 7721 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7722 for (i = start + 1; i < start + bs; i++) { 7723 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7724 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7725 cprevtmp = cprev; 7726 cprev = merged; 7727 merged = cprevtmp; 7728 } 7729 *ncollapsed = nprev; 7730 if (collapsed) *collapsed = cprev; 7731 PetscFunctionReturn(PETSC_SUCCESS); 7732 } 7733 7734 /* 7735 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7736 */ 7737 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7738 { 7739 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7740 Mat tGmat; 7741 MPI_Comm comm; 7742 const PetscScalar *vals; 7743 const PetscInt *idx; 7744 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7745 MatScalar *AA; // this is checked in graph 7746 PetscBool isseqaij; 7747 Mat a, b, c; 7748 MatType jtype; 7749 7750 PetscFunctionBegin; 7751 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7752 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7753 PetscCall(MatGetType(Gmat, &jtype)); 7754 PetscCall(MatCreate(comm, &tGmat)); 7755 PetscCall(MatSetType(tGmat, jtype)); 7756 7757 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7758 Also, if the matrix is symmetric, can we skip this 7759 operation? It can be very expensive on large matrices. */ 7760 7761 // global sizes 7762 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7763 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7764 nloc = Iend - Istart; 7765 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7766 if (isseqaij) { 7767 a = Gmat; 7768 b = NULL; 7769 } else { 7770 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7771 a = d->A; 7772 b = d->B; 7773 garray = d->garray; 7774 } 7775 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7776 for (PetscInt row = 0; row < nloc; row++) { 7777 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7778 d_nnz[row] = ncols; 7779 if (ncols > maxcols) maxcols = ncols; 7780 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7781 } 7782 if (b) { 7783 for (PetscInt row = 0; row < nloc; row++) { 7784 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7785 o_nnz[row] = ncols; 7786 if (ncols > maxcols) maxcols = ncols; 7787 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7788 } 7789 } 7790 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7791 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7792 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7793 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7794 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7795 PetscCall(PetscFree2(d_nnz, o_nnz)); 7796 // 7797 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7798 nnz0 = nnz1 = 0; 7799 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7800 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7801 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7802 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7803 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7804 if (PetscRealPart(sv) > vfilter) { 7805 nnz1++; 7806 PetscInt cid = idx[jj] + Istart; //diag 7807 if (c != a) cid = garray[idx[jj]]; 7808 AA[ncol_row] = vals[jj]; 7809 AJ[ncol_row] = cid; 7810 ncol_row++; 7811 } 7812 } 7813 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7814 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7815 } 7816 } 7817 PetscCall(PetscFree2(AA, AJ)); 7818 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7819 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7820 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7821 7822 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7823 7824 *filteredG = tGmat; 7825 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7826 PetscFunctionReturn(PETSC_SUCCESS); 7827 } 7828 7829 /* 7830 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7831 7832 Input Parameter: 7833 . Amat - matrix 7834 - symmetrize - make the result symmetric 7835 + scale - scale with diagonal 7836 7837 Output Parameter: 7838 . a_Gmat - output scalar graph >= 0 7839 7840 */ 7841 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7842 { 7843 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7844 MPI_Comm comm; 7845 Mat Gmat; 7846 PetscBool ismpiaij, isseqaij; 7847 Mat a, b, c; 7848 MatType jtype; 7849 7850 PetscFunctionBegin; 7851 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7852 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7853 PetscCall(MatGetSize(Amat, &MM, &NN)); 7854 PetscCall(MatGetBlockSize(Amat, &bs)); 7855 nloc = (Iend - Istart) / bs; 7856 7857 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7858 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7859 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7860 7861 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7862 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7863 implementation */ 7864 if (bs > 1) { 7865 PetscCall(MatGetType(Amat, &jtype)); 7866 PetscCall(MatCreate(comm, &Gmat)); 7867 PetscCall(MatSetType(Gmat, jtype)); 7868 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7869 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7870 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7871 PetscInt *d_nnz, *o_nnz; 7872 MatScalar *aa, val, *AA; 7873 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7874 if (isseqaij) { 7875 a = Amat; 7876 b = NULL; 7877 } else { 7878 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7879 a = d->A; 7880 b = d->B; 7881 } 7882 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7883 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7884 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7885 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7886 const PetscInt *cols1, *cols2; 7887 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7888 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7889 nnz[brow / bs] = nc2 / bs; 7890 if (nc2 % bs) ok = 0; 7891 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7892 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7893 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7894 if (nc1 != nc2) ok = 0; 7895 else { 7896 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7897 if (cols1[jj] != cols2[jj]) ok = 0; 7898 if (cols1[jj] % bs != jj % bs) ok = 0; 7899 } 7900 } 7901 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7902 } 7903 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7904 if (!ok) { 7905 PetscCall(PetscFree2(d_nnz, o_nnz)); 7906 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7907 goto old_bs; 7908 } 7909 } 7910 } 7911 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7912 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7913 PetscCall(PetscFree2(d_nnz, o_nnz)); 7914 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7915 // diag 7916 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7917 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7918 ai = aseq->i; 7919 n = ai[brow + 1] - ai[brow]; 7920 aj = aseq->j + ai[brow]; 7921 for (int k = 0; k < n; k += bs) { // block columns 7922 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7923 val = 0; 7924 for (int ii = 0; ii < bs; ii++) { // rows in block 7925 aa = aseq->a + ai[brow + ii] + k; 7926 for (int jj = 0; jj < bs; jj++) { // columns in block 7927 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7928 } 7929 } 7930 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7931 AA[k / bs] = val; 7932 } 7933 grow = Istart / bs + brow / bs; 7934 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7935 } 7936 // off-diag 7937 if (ismpiaij) { 7938 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7939 const PetscScalar *vals; 7940 const PetscInt *cols, *garray = aij->garray; 7941 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7942 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7943 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7944 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7945 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7946 AA[k / bs] = 0; 7947 AJ[cidx] = garray[cols[k]] / bs; 7948 } 7949 nc = ncols / bs; 7950 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7951 for (int ii = 0; ii < bs; ii++) { // rows in block 7952 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7953 for (int k = 0; k < ncols; k += bs) { 7954 for (int jj = 0; jj < bs; jj++) { // cols in block 7955 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7956 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7957 } 7958 } 7959 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7960 } 7961 grow = Istart / bs + brow / bs; 7962 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7963 } 7964 } 7965 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7966 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7967 PetscCall(PetscFree2(AA, AJ)); 7968 } else { 7969 const PetscScalar *vals; 7970 const PetscInt *idx; 7971 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7972 old_bs: 7973 /* 7974 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7975 */ 7976 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7977 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7978 if (isseqaij) { 7979 PetscInt max_d_nnz; 7980 /* 7981 Determine exact preallocation count for (sequential) scalar matrix 7982 */ 7983 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7984 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7985 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7986 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7987 PetscCall(PetscFree3(w0, w1, w2)); 7988 } else if (ismpiaij) { 7989 Mat Daij, Oaij; 7990 const PetscInt *garray; 7991 PetscInt max_d_nnz; 7992 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7993 /* 7994 Determine exact preallocation count for diagonal block portion of scalar matrix 7995 */ 7996 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7997 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7998 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7999 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 8000 PetscCall(PetscFree3(w0, w1, w2)); 8001 /* 8002 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 8003 */ 8004 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 8005 o_nnz[jj] = 0; 8006 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 8007 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 8008 o_nnz[jj] += ncols; 8009 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 8010 } 8011 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 8012 } 8013 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 8014 /* get scalar copy (norms) of matrix */ 8015 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 8016 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 8017 PetscCall(PetscFree2(d_nnz, o_nnz)); 8018 for (Ii = Istart; Ii < Iend; Ii++) { 8019 PetscInt dest_row = Ii / bs; 8020 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 8021 for (jj = 0; jj < ncols; jj++) { 8022 PetscInt dest_col = idx[jj] / bs; 8023 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 8024 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 8025 } 8026 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 8027 } 8028 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8029 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8030 } 8031 } else { 8032 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8033 else { 8034 Gmat = Amat; 8035 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8036 } 8037 if (isseqaij) { 8038 a = Gmat; 8039 b = NULL; 8040 } else { 8041 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8042 a = d->A; 8043 b = d->B; 8044 } 8045 if (filter >= 0 || scale) { 8046 /* take absolute value of each entry */ 8047 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8048 MatInfo info; 8049 PetscScalar *avals; 8050 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8051 PetscCall(MatSeqAIJGetArray(c, &avals)); 8052 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8053 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8054 } 8055 } 8056 } 8057 if (symmetrize) { 8058 PetscBool isset, issym; 8059 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8060 if (!isset || !issym) { 8061 Mat matTrans; 8062 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8063 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8064 PetscCall(MatDestroy(&matTrans)); 8065 } 8066 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8067 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8068 if (scale) { 8069 /* scale c for all diagonal values = 1 or -1 */ 8070 Vec diag; 8071 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8072 PetscCall(MatGetDiagonal(Gmat, diag)); 8073 PetscCall(VecReciprocal(diag)); 8074 PetscCall(VecSqrtAbs(diag)); 8075 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8076 PetscCall(VecDestroy(&diag)); 8077 } 8078 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8079 8080 if (filter >= 0) { 8081 Mat Fmat = NULL; /* some silly compiler needs this */ 8082 8083 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 8084 PetscCall(MatDestroy(&Gmat)); 8085 Gmat = Fmat; 8086 } 8087 *a_Gmat = Gmat; 8088 PetscFunctionReturn(PETSC_SUCCESS); 8089 } 8090 8091 /* 8092 Special version for direct calls from Fortran 8093 */ 8094 #include <petsc/private/fortranimpl.h> 8095 8096 /* Change these macros so can be used in void function */ 8097 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8098 #undef PetscCall 8099 #define PetscCall(...) \ 8100 do { \ 8101 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8102 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8103 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8104 return; \ 8105 } \ 8106 } while (0) 8107 8108 #undef SETERRQ 8109 #define SETERRQ(comm, ierr, ...) \ 8110 do { \ 8111 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8112 return; \ 8113 } while (0) 8114 8115 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8116 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8117 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8118 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8119 #else 8120 #endif 8121 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8122 { 8123 Mat mat = *mmat; 8124 PetscInt m = *mm, n = *mn; 8125 InsertMode addv = *maddv; 8126 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8127 PetscScalar value; 8128 8129 MatCheckPreallocated(mat, 1); 8130 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8131 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8132 { 8133 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8134 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8135 PetscBool roworiented = aij->roworiented; 8136 8137 /* Some Variables required in the macro */ 8138 Mat A = aij->A; 8139 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8140 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8141 MatScalar *aa; 8142 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8143 Mat B = aij->B; 8144 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8145 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8146 MatScalar *ba; 8147 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8148 * cannot use "#if defined" inside a macro. */ 8149 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8150 8151 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8152 PetscInt nonew = a->nonew; 8153 MatScalar *ap1, *ap2; 8154 8155 PetscFunctionBegin; 8156 PetscCall(MatSeqAIJGetArray(A, &aa)); 8157 PetscCall(MatSeqAIJGetArray(B, &ba)); 8158 for (i = 0; i < m; i++) { 8159 if (im[i] < 0) continue; 8160 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8161 if (im[i] >= rstart && im[i] < rend) { 8162 row = im[i] - rstart; 8163 lastcol1 = -1; 8164 rp1 = aj + ai[row]; 8165 ap1 = aa + ai[row]; 8166 rmax1 = aimax[row]; 8167 nrow1 = ailen[row]; 8168 low1 = 0; 8169 high1 = nrow1; 8170 lastcol2 = -1; 8171 rp2 = bj + bi[row]; 8172 ap2 = ba + bi[row]; 8173 rmax2 = bimax[row]; 8174 nrow2 = bilen[row]; 8175 low2 = 0; 8176 high2 = nrow2; 8177 8178 for (j = 0; j < n; j++) { 8179 if (roworiented) value = v[i * n + j]; 8180 else value = v[i + j * m]; 8181 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8182 if (in[j] >= cstart && in[j] < cend) { 8183 col = in[j] - cstart; 8184 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8185 } else if (in[j] < 0) continue; 8186 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8187 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8188 } else { 8189 if (mat->was_assembled) { 8190 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8191 #if defined(PETSC_USE_CTABLE) 8192 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8193 col--; 8194 #else 8195 col = aij->colmap[in[j]] - 1; 8196 #endif 8197 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8198 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8199 col = in[j]; 8200 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8201 B = aij->B; 8202 b = (Mat_SeqAIJ *)B->data; 8203 bimax = b->imax; 8204 bi = b->i; 8205 bilen = b->ilen; 8206 bj = b->j; 8207 rp2 = bj + bi[row]; 8208 ap2 = ba + bi[row]; 8209 rmax2 = bimax[row]; 8210 nrow2 = bilen[row]; 8211 low2 = 0; 8212 high2 = nrow2; 8213 bm = aij->B->rmap->n; 8214 ba = b->a; 8215 inserted = PETSC_FALSE; 8216 } 8217 } else col = in[j]; 8218 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8219 } 8220 } 8221 } else if (!aij->donotstash) { 8222 if (roworiented) { 8223 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8224 } else { 8225 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8226 } 8227 } 8228 } 8229 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8230 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8231 } 8232 PetscFunctionReturnVoid(); 8233 } 8234 8235 /* Undefining these here since they were redefined from their original definition above! No 8236 * other PETSc functions should be defined past this point, as it is impossible to recover the 8237 * original definitions */ 8238 #undef PetscCall 8239 #undef SETERRQ 8240