1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 #if defined(PETSC_USE_LOG) 15 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 16 #endif 17 PetscCall(MatStashDestroy_Private(&mat->stash)); 18 PetscCall(VecDestroy(&aij->diag)); 19 PetscCall(MatDestroy(&aij->A)); 20 PetscCall(MatDestroy(&aij->B)); 21 #if defined(PETSC_USE_CTABLE) 22 PetscCall(PetscHMapIDestroy(&aij->colmap)); 23 #else 24 PetscCall(PetscFree(aij->colmap)); 25 #endif 26 PetscCall(PetscFree(aij->garray)); 27 PetscCall(VecDestroy(&aij->lvec)); 28 PetscCall(VecScatterDestroy(&aij->Mvctx)); 29 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 30 PetscCall(PetscFree(aij->ld)); 31 32 PetscCall(PetscFree(mat->data)); 33 34 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 35 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 36 37 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 47 #if defined(PETSC_HAVE_CUDA) 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 49 #endif 50 #if defined(PETSC_HAVE_HIP) 51 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 52 #endif 53 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 55 #endif 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 57 #if defined(PETSC_HAVE_ELEMENTAL) 58 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 59 #endif 60 #if defined(PETSC_HAVE_SCALAPACK) 61 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 62 #endif 63 #if defined(PETSC_HAVE_HYPRE) 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 66 #endif 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 73 #if defined(PETSC_HAVE_MKL_SPARSE) 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 75 #endif 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 79 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 85 #define TYPE AIJ 86 #define TYPE_AIJ 87 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 88 #undef TYPE 89 #undef TYPE_AIJ 90 91 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 92 { 93 Mat B; 94 95 PetscFunctionBegin; 96 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 97 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 98 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 99 PetscCall(MatDestroy(&B)); 100 PetscFunctionReturn(PETSC_SUCCESS); 101 } 102 103 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 104 { 105 Mat B; 106 107 PetscFunctionBegin; 108 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 109 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 110 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 111 PetscFunctionReturn(PETSC_SUCCESS); 112 } 113 114 /*MC 115 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 116 117 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 118 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 119 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 120 for communicators controlling multiple processes. It is recommended that you call both of 121 the above preallocation routines for simplicity. 122 123 Options Database Key: 124 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 125 126 Developer Note: 127 Level: beginner 128 129 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 130 enough exist. 131 132 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 133 M*/ 134 135 /*MC 136 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 137 138 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 139 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 140 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 141 for communicators controlling multiple processes. It is recommended that you call both of 142 the above preallocation routines for simplicity. 143 144 Options Database Key: 145 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 146 147 Level: beginner 148 149 .seealso: [](chapter_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 150 M*/ 151 152 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 153 { 154 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 155 156 PetscFunctionBegin; 157 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 158 A->boundtocpu = flg; 159 #endif 160 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 161 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 162 163 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 164 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 165 * to differ from the parent matrix. */ 166 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 167 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 168 169 PetscFunctionReturn(PETSC_SUCCESS); 170 } 171 172 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 173 { 174 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 175 176 PetscFunctionBegin; 177 if (mat->A) { 178 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 179 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 180 } 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 185 { 186 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 187 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 188 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 189 const PetscInt *ia, *ib; 190 const MatScalar *aa, *bb, *aav, *bav; 191 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 192 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 193 194 PetscFunctionBegin; 195 *keptrows = NULL; 196 197 ia = a->i; 198 ib = b->i; 199 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 200 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 201 for (i = 0; i < m; i++) { 202 na = ia[i + 1] - ia[i]; 203 nb = ib[i + 1] - ib[i]; 204 if (!na && !nb) { 205 cnt++; 206 goto ok1; 207 } 208 aa = aav + ia[i]; 209 for (j = 0; j < na; j++) { 210 if (aa[j] != 0.0) goto ok1; 211 } 212 bb = bav + ib[i]; 213 for (j = 0; j < nb; j++) { 214 if (bb[j] != 0.0) goto ok1; 215 } 216 cnt++; 217 ok1:; 218 } 219 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 220 if (!n0rows) { 221 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 222 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 223 PetscFunctionReturn(PETSC_SUCCESS); 224 } 225 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 226 cnt = 0; 227 for (i = 0; i < m; i++) { 228 na = ia[i + 1] - ia[i]; 229 nb = ib[i + 1] - ib[i]; 230 if (!na && !nb) continue; 231 aa = aav + ia[i]; 232 for (j = 0; j < na; j++) { 233 if (aa[j] != 0.0) { 234 rows[cnt++] = rstart + i; 235 goto ok2; 236 } 237 } 238 bb = bav + ib[i]; 239 for (j = 0; j < nb; j++) { 240 if (bb[j] != 0.0) { 241 rows[cnt++] = rstart + i; 242 goto ok2; 243 } 244 } 245 ok2:; 246 } 247 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 248 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 249 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 250 PetscFunctionReturn(PETSC_SUCCESS); 251 } 252 253 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 254 { 255 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 256 PetscBool cong; 257 258 PetscFunctionBegin; 259 PetscCall(MatHasCongruentLayouts(Y, &cong)); 260 if (Y->assembled && cong) { 261 PetscCall(MatDiagonalSet(aij->A, D, is)); 262 } else { 263 PetscCall(MatDiagonalSet_Default(Y, D, is)); 264 } 265 PetscFunctionReturn(PETSC_SUCCESS); 266 } 267 268 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 269 { 270 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 271 PetscInt i, rstart, nrows, *rows; 272 273 PetscFunctionBegin; 274 *zrows = NULL; 275 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 276 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 277 for (i = 0; i < nrows; i++) rows[i] += rstart; 278 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 279 PetscFunctionReturn(PETSC_SUCCESS); 280 } 281 282 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 283 { 284 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 285 PetscInt i, m, n, *garray = aij->garray; 286 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 287 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 288 PetscReal *work; 289 const PetscScalar *dummy; 290 291 PetscFunctionBegin; 292 PetscCall(MatGetSize(A, &m, &n)); 293 PetscCall(PetscCalloc1(n, &work)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 296 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 297 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 298 if (type == NORM_2) { 299 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 300 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 301 } else if (type == NORM_1) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 304 } else if (type == NORM_INFINITY) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 307 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 310 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 311 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 312 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 313 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 314 if (type == NORM_INFINITY) { 315 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 316 } else { 317 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 318 } 319 PetscCall(PetscFree(work)); 320 if (type == NORM_2) { 321 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 322 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 323 for (i = 0; i < n; i++) reductions[i] /= m; 324 } 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 329 { 330 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 331 IS sis, gis; 332 const PetscInt *isis, *igis; 333 PetscInt n, *iis, nsis, ngis, rstart, i; 334 335 PetscFunctionBegin; 336 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 337 PetscCall(MatFindNonzeroRows(a->B, &gis)); 338 PetscCall(ISGetSize(gis, &ngis)); 339 PetscCall(ISGetSize(sis, &nsis)); 340 PetscCall(ISGetIndices(sis, &isis)); 341 PetscCall(ISGetIndices(gis, &igis)); 342 343 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 344 PetscCall(PetscArraycpy(iis, igis, ngis)); 345 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 346 n = ngis + nsis; 347 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 348 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 349 for (i = 0; i < n; i++) iis[i] += rstart; 350 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 351 352 PetscCall(ISRestoreIndices(sis, &isis)); 353 PetscCall(ISRestoreIndices(gis, &igis)); 354 PetscCall(ISDestroy(&sis)); 355 PetscCall(ISDestroy(&gis)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 /* 360 Local utility routine that creates a mapping from the global column 361 number to the local number in the off-diagonal part of the local 362 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 363 a slightly higher hash table cost; without it it is not scalable (each processor 364 has an order N integer array but is fast to access. 365 */ 366 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 367 { 368 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 369 PetscInt n = aij->B->cmap->n, i; 370 371 PetscFunctionBegin; 372 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 373 #if defined(PETSC_USE_CTABLE) 374 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 375 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 376 #else 377 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 378 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 379 #endif 380 PetscFunctionReturn(PETSC_SUCCESS); 381 } 382 383 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 384 { \ 385 if (col <= lastcol1) low1 = 0; \ 386 else high1 = nrow1; \ 387 lastcol1 = col; \ 388 while (high1 - low1 > 5) { \ 389 t = (low1 + high1) / 2; \ 390 if (rp1[t] > col) high1 = t; \ 391 else low1 = t; \ 392 } \ 393 for (_i = low1; _i < high1; _i++) { \ 394 if (rp1[_i] > col) break; \ 395 if (rp1[_i] == col) { \ 396 if (addv == ADD_VALUES) { \ 397 ap1[_i] += value; \ 398 /* Not sure LogFlops will slow dow the code or not */ \ 399 (void)PetscLogFlops(1.0); \ 400 } else ap1[_i] = value; \ 401 goto a_noinsert; \ 402 } \ 403 } \ 404 if (value == 0.0 && ignorezeroentries && row != col) { \ 405 low1 = 0; \ 406 high1 = nrow1; \ 407 goto a_noinsert; \ 408 } \ 409 if (nonew == 1) { \ 410 low1 = 0; \ 411 high1 = nrow1; \ 412 goto a_noinsert; \ 413 } \ 414 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 415 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 416 N = nrow1++ - 1; \ 417 a->nz++; \ 418 high1++; \ 419 /* shift up all the later entries in this row */ \ 420 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 421 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 422 rp1[_i] = col; \ 423 ap1[_i] = value; \ 424 A->nonzerostate++; \ 425 a_noinsert:; \ 426 ailen[row] = nrow1; \ 427 } 428 429 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 430 { \ 431 if (col <= lastcol2) low2 = 0; \ 432 else high2 = nrow2; \ 433 lastcol2 = col; \ 434 while (high2 - low2 > 5) { \ 435 t = (low2 + high2) / 2; \ 436 if (rp2[t] > col) high2 = t; \ 437 else low2 = t; \ 438 } \ 439 for (_i = low2; _i < high2; _i++) { \ 440 if (rp2[_i] > col) break; \ 441 if (rp2[_i] == col) { \ 442 if (addv == ADD_VALUES) { \ 443 ap2[_i] += value; \ 444 (void)PetscLogFlops(1.0); \ 445 } else ap2[_i] = value; \ 446 goto b_noinsert; \ 447 } \ 448 } \ 449 if (value == 0.0 && ignorezeroentries) { \ 450 low2 = 0; \ 451 high2 = nrow2; \ 452 goto b_noinsert; \ 453 } \ 454 if (nonew == 1) { \ 455 low2 = 0; \ 456 high2 = nrow2; \ 457 goto b_noinsert; \ 458 } \ 459 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 460 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 461 N = nrow2++ - 1; \ 462 b->nz++; \ 463 high2++; \ 464 /* shift up all the later entries in this row */ \ 465 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 466 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 467 rp2[_i] = col; \ 468 ap2[_i] = value; \ 469 B->nonzerostate++; \ 470 b_noinsert:; \ 471 bilen[row] = nrow2; \ 472 } 473 474 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 475 { 476 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 477 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 478 PetscInt l, *garray = mat->garray, diag; 479 PetscScalar *aa, *ba; 480 481 PetscFunctionBegin; 482 /* code only works for square matrices A */ 483 484 /* find size of row to the left of the diagonal part */ 485 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 486 row = row - diag; 487 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 488 if (garray[b->j[b->i[row] + l]] > diag) break; 489 } 490 if (l) { 491 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 492 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 493 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 494 } 495 496 /* diagonal part */ 497 if (a->i[row + 1] - a->i[row]) { 498 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 499 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 500 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 501 } 502 503 /* right of diagonal part */ 504 if (b->i[row + 1] - b->i[row] - l) { 505 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 506 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 507 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 508 } 509 PetscFunctionReturn(PETSC_SUCCESS); 510 } 511 512 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 513 { 514 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 515 PetscScalar value = 0.0; 516 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 517 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 518 PetscBool roworiented = aij->roworiented; 519 520 /* Some Variables required in the macro */ 521 Mat A = aij->A; 522 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 523 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 524 PetscBool ignorezeroentries = a->ignorezeroentries; 525 Mat B = aij->B; 526 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 527 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 528 MatScalar *aa, *ba; 529 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 530 PetscInt nonew; 531 MatScalar *ap1, *ap2; 532 533 PetscFunctionBegin; 534 PetscCall(MatSeqAIJGetArray(A, &aa)); 535 PetscCall(MatSeqAIJGetArray(B, &ba)); 536 for (i = 0; i < m; i++) { 537 if (im[i] < 0) continue; 538 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 539 if (im[i] >= rstart && im[i] < rend) { 540 row = im[i] - rstart; 541 lastcol1 = -1; 542 rp1 = aj + ai[row]; 543 ap1 = aa + ai[row]; 544 rmax1 = aimax[row]; 545 nrow1 = ailen[row]; 546 low1 = 0; 547 high1 = nrow1; 548 lastcol2 = -1; 549 rp2 = bj + bi[row]; 550 ap2 = ba + bi[row]; 551 rmax2 = bimax[row]; 552 nrow2 = bilen[row]; 553 low2 = 0; 554 high2 = nrow2; 555 556 for (j = 0; j < n; j++) { 557 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 558 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 559 if (in[j] >= cstart && in[j] < cend) { 560 col = in[j] - cstart; 561 nonew = a->nonew; 562 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 563 } else if (in[j] < 0) { 564 continue; 565 } else { 566 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 567 if (mat->was_assembled) { 568 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 569 #if defined(PETSC_USE_CTABLE) 570 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 571 col--; 572 #else 573 col = aij->colmap[in[j]] - 1; 574 #endif 575 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 576 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 577 col = in[j]; 578 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 579 B = aij->B; 580 b = (Mat_SeqAIJ *)B->data; 581 bimax = b->imax; 582 bi = b->i; 583 bilen = b->ilen; 584 bj = b->j; 585 ba = b->a; 586 rp2 = bj + bi[row]; 587 ap2 = ba + bi[row]; 588 rmax2 = bimax[row]; 589 nrow2 = bilen[row]; 590 low2 = 0; 591 high2 = nrow2; 592 bm = aij->B->rmap->n; 593 ba = b->a; 594 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 595 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 596 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 597 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 598 } 599 } else col = in[j]; 600 nonew = b->nonew; 601 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 602 } 603 } 604 } else { 605 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 606 if (!aij->donotstash) { 607 mat->assembled = PETSC_FALSE; 608 if (roworiented) { 609 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 610 } else { 611 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 612 } 613 } 614 } 615 } 616 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 617 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 618 PetscFunctionReturn(PETSC_SUCCESS); 619 } 620 621 /* 622 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 623 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 624 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 625 */ 626 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 627 { 628 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 629 Mat A = aij->A; /* diagonal part of the matrix */ 630 Mat B = aij->B; /* offdiagonal part of the matrix */ 631 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 632 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 633 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 634 PetscInt *ailen = a->ilen, *aj = a->j; 635 PetscInt *bilen = b->ilen, *bj = b->j; 636 PetscInt am = aij->A->rmap->n, j; 637 PetscInt diag_so_far = 0, dnz; 638 PetscInt offd_so_far = 0, onz; 639 640 PetscFunctionBegin; 641 /* Iterate over all rows of the matrix */ 642 for (j = 0; j < am; j++) { 643 dnz = onz = 0; 644 /* Iterate over all non-zero columns of the current row */ 645 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 646 /* If column is in the diagonal */ 647 if (mat_j[col] >= cstart && mat_j[col] < cend) { 648 aj[diag_so_far++] = mat_j[col] - cstart; 649 dnz++; 650 } else { /* off-diagonal entries */ 651 bj[offd_so_far++] = mat_j[col]; 652 onz++; 653 } 654 } 655 ailen[j] = dnz; 656 bilen[j] = onz; 657 } 658 PetscFunctionReturn(PETSC_SUCCESS); 659 } 660 661 /* 662 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 663 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 664 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 665 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 666 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 667 */ 668 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 671 Mat A = aij->A; /* diagonal part of the matrix */ 672 Mat B = aij->B; /* offdiagonal part of the matrix */ 673 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 674 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 675 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 676 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 677 PetscInt *ailen = a->ilen, *aj = a->j; 678 PetscInt *bilen = b->ilen, *bj = b->j; 679 PetscInt am = aij->A->rmap->n, j; 680 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 681 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 682 PetscScalar *aa = a->a, *ba = b->a; 683 684 PetscFunctionBegin; 685 /* Iterate over all rows of the matrix */ 686 for (j = 0; j < am; j++) { 687 dnz_row = onz_row = 0; 688 rowstart_offd = full_offd_i[j]; 689 rowstart_diag = full_diag_i[j]; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 695 aa[rowstart_diag + dnz_row] = mat_a[col]; 696 dnz_row++; 697 } else { /* off-diagonal entries */ 698 bj[rowstart_offd + onz_row] = mat_j[col]; 699 ba[rowstart_offd + onz_row] = mat_a[col]; 700 onz_row++; 701 } 702 } 703 ailen[j] = dnz_row; 704 bilen[j] = onz_row; 705 } 706 PetscFunctionReturn(PETSC_SUCCESS); 707 } 708 709 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 710 { 711 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 712 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 713 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 714 715 PetscFunctionBegin; 716 for (i = 0; i < m; i++) { 717 if (idxm[i] < 0) continue; /* negative row */ 718 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 719 if (idxm[i] >= rstart && idxm[i] < rend) { 720 row = idxm[i] - rstart; 721 for (j = 0; j < n; j++) { 722 if (idxn[j] < 0) continue; /* negative column */ 723 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 724 if (idxn[j] >= cstart && idxn[j] < cend) { 725 col = idxn[j] - cstart; 726 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 727 } else { 728 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 729 #if defined(PETSC_USE_CTABLE) 730 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 731 col--; 732 #else 733 col = aij->colmap[idxn[j]] - 1; 734 #endif 735 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 736 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 737 } 738 } 739 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 740 } 741 PetscFunctionReturn(PETSC_SUCCESS); 742 } 743 744 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 747 PetscInt nstash, reallocs; 748 749 PetscFunctionBegin; 750 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 751 752 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 753 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 754 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 755 PetscFunctionReturn(PETSC_SUCCESS); 756 } 757 758 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 759 { 760 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 761 PetscMPIInt n; 762 PetscInt i, j, rstart, ncols, flg; 763 PetscInt *row, *col; 764 PetscBool other_disassembled; 765 PetscScalar *val; 766 767 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 768 769 PetscFunctionBegin; 770 if (!aij->donotstash && !mat->nooffprocentries) { 771 while (1) { 772 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 773 if (!flg) break; 774 775 for (i = 0; i < n;) { 776 /* Now identify the consecutive vals belonging to the same row */ 777 for (j = i, rstart = row[j]; j < n; j++) { 778 if (row[j] != rstart) break; 779 } 780 if (j < n) ncols = j - i; 781 else ncols = n - i; 782 /* Now assemble all these values with a single function call */ 783 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 784 i = j; 785 } 786 } 787 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 788 } 789 #if defined(PETSC_HAVE_DEVICE) 790 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 791 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 792 if (mat->boundtocpu) { 793 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 794 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 795 } 796 #endif 797 PetscCall(MatAssemblyBegin(aij->A, mode)); 798 PetscCall(MatAssemblyEnd(aij->A, mode)); 799 800 /* determine if any processor has disassembled, if so we must 801 also disassemble ourself, in order that we may reassemble. */ 802 /* 803 if nonzero structure of submatrix B cannot change then we know that 804 no processor disassembled thus we can skip this stuff 805 */ 806 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 807 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 808 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 809 PetscCall(MatDisAssemble_MPIAIJ(mat)); 810 } 811 } 812 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 813 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 814 #if defined(PETSC_HAVE_DEVICE) 815 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 816 #endif 817 PetscCall(MatAssemblyBegin(aij->B, mode)); 818 PetscCall(MatAssemblyEnd(aij->B, mode)); 819 820 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 821 822 aij->rowvalues = NULL; 823 824 PetscCall(VecDestroy(&aij->diag)); 825 826 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 827 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 828 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 829 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 830 } 831 #if defined(PETSC_HAVE_DEVICE) 832 mat->offloadmask = PETSC_OFFLOAD_BOTH; 833 #endif 834 PetscFunctionReturn(PETSC_SUCCESS); 835 } 836 837 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 838 { 839 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 840 841 PetscFunctionBegin; 842 PetscCall(MatZeroEntries(l->A)); 843 PetscCall(MatZeroEntries(l->B)); 844 PetscFunctionReturn(PETSC_SUCCESS); 845 } 846 847 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 848 { 849 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 850 PetscObjectState sA, sB; 851 PetscInt *lrows; 852 PetscInt r, len; 853 PetscBool cong, lch, gch; 854 855 PetscFunctionBegin; 856 /* get locally owned rows */ 857 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 858 PetscCall(MatHasCongruentLayouts(A, &cong)); 859 /* fix right hand side if needed */ 860 if (x && b) { 861 const PetscScalar *xx; 862 PetscScalar *bb; 863 864 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 865 PetscCall(VecGetArrayRead(x, &xx)); 866 PetscCall(VecGetArray(b, &bb)); 867 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 868 PetscCall(VecRestoreArrayRead(x, &xx)); 869 PetscCall(VecRestoreArray(b, &bb)); 870 } 871 872 sA = mat->A->nonzerostate; 873 sB = mat->B->nonzerostate; 874 875 if (diag != 0.0 && cong) { 876 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 877 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 878 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 879 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 880 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 881 PetscInt nnwA, nnwB; 882 PetscBool nnzA, nnzB; 883 884 nnwA = aijA->nonew; 885 nnwB = aijB->nonew; 886 nnzA = aijA->keepnonzeropattern; 887 nnzB = aijB->keepnonzeropattern; 888 if (!nnzA) { 889 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 890 aijA->nonew = 0; 891 } 892 if (!nnzB) { 893 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 894 aijB->nonew = 0; 895 } 896 /* Must zero here before the next loop */ 897 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 898 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 899 for (r = 0; r < len; ++r) { 900 const PetscInt row = lrows[r] + A->rmap->rstart; 901 if (row >= A->cmap->N) continue; 902 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 903 } 904 aijA->nonew = nnwA; 905 aijB->nonew = nnwB; 906 } else { 907 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 908 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 909 } 910 PetscCall(PetscFree(lrows)); 911 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 912 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 913 914 /* reduce nonzerostate */ 915 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 916 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 917 if (gch) A->nonzerostate++; 918 PetscFunctionReturn(PETSC_SUCCESS); 919 } 920 921 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 922 { 923 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 924 PetscMPIInt n = A->rmap->n; 925 PetscInt i, j, r, m, len = 0; 926 PetscInt *lrows, *owners = A->rmap->range; 927 PetscMPIInt p = 0; 928 PetscSFNode *rrows; 929 PetscSF sf; 930 const PetscScalar *xx; 931 PetscScalar *bb, *mask, *aij_a; 932 Vec xmask, lmask; 933 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 934 const PetscInt *aj, *ii, *ridx; 935 PetscScalar *aa; 936 937 PetscFunctionBegin; 938 /* Create SF where leaves are input rows and roots are owned rows */ 939 PetscCall(PetscMalloc1(n, &lrows)); 940 for (r = 0; r < n; ++r) lrows[r] = -1; 941 PetscCall(PetscMalloc1(N, &rrows)); 942 for (r = 0; r < N; ++r) { 943 const PetscInt idx = rows[r]; 944 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 945 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 946 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 947 } 948 rrows[r].rank = p; 949 rrows[r].index = rows[r] - owners[p]; 950 } 951 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 952 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 953 /* Collect flags for rows to be zeroed */ 954 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 955 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 956 PetscCall(PetscSFDestroy(&sf)); 957 /* Compress and put in row numbers */ 958 for (r = 0; r < n; ++r) 959 if (lrows[r] >= 0) lrows[len++] = r; 960 /* zero diagonal part of matrix */ 961 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 962 /* handle off diagonal part of matrix */ 963 PetscCall(MatCreateVecs(A, &xmask, NULL)); 964 PetscCall(VecDuplicate(l->lvec, &lmask)); 965 PetscCall(VecGetArray(xmask, &bb)); 966 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 967 PetscCall(VecRestoreArray(xmask, &bb)); 968 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 969 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecDestroy(&xmask)); 971 if (x && b) { /* this code is buggy when the row and column layout don't match */ 972 PetscBool cong; 973 974 PetscCall(MatHasCongruentLayouts(A, &cong)); 975 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 976 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 977 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 978 PetscCall(VecGetArrayRead(l->lvec, &xx)); 979 PetscCall(VecGetArray(b, &bb)); 980 } 981 PetscCall(VecGetArray(lmask, &mask)); 982 /* remove zeroed rows of off diagonal matrix */ 983 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 984 ii = aij->i; 985 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 986 /* loop over all elements of off process part of matrix zeroing removed columns*/ 987 if (aij->compressedrow.use) { 988 m = aij->compressedrow.nrows; 989 ii = aij->compressedrow.i; 990 ridx = aij->compressedrow.rindex; 991 for (i = 0; i < m; i++) { 992 n = ii[i + 1] - ii[i]; 993 aj = aij->j + ii[i]; 994 aa = aij_a + ii[i]; 995 996 for (j = 0; j < n; j++) { 997 if (PetscAbsScalar(mask[*aj])) { 998 if (b) bb[*ridx] -= *aa * xx[*aj]; 999 *aa = 0.0; 1000 } 1001 aa++; 1002 aj++; 1003 } 1004 ridx++; 1005 } 1006 } else { /* do not use compressed row format */ 1007 m = l->B->rmap->n; 1008 for (i = 0; i < m; i++) { 1009 n = ii[i + 1] - ii[i]; 1010 aj = aij->j + ii[i]; 1011 aa = aij_a + ii[i]; 1012 for (j = 0; j < n; j++) { 1013 if (PetscAbsScalar(mask[*aj])) { 1014 if (b) bb[i] -= *aa * xx[*aj]; 1015 *aa = 0.0; 1016 } 1017 aa++; 1018 aj++; 1019 } 1020 } 1021 } 1022 if (x && b) { 1023 PetscCall(VecRestoreArray(b, &bb)); 1024 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1025 } 1026 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1027 PetscCall(VecRestoreArray(lmask, &mask)); 1028 PetscCall(VecDestroy(&lmask)); 1029 PetscCall(PetscFree(lrows)); 1030 1031 /* only change matrix nonzero state if pattern was allowed to be changed */ 1032 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1033 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1034 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1035 } 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } 1038 1039 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1040 { 1041 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1042 PetscInt nt; 1043 VecScatter Mvctx = a->Mvctx; 1044 1045 PetscFunctionBegin; 1046 PetscCall(VecGetLocalSize(xx, &nt)); 1047 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1048 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1049 PetscUseTypeMethod(a->A, mult, xx, yy); 1050 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1051 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1058 1059 PetscFunctionBegin; 1060 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1061 PetscFunctionReturn(PETSC_SUCCESS); 1062 } 1063 1064 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1065 { 1066 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1067 VecScatter Mvctx = a->Mvctx; 1068 1069 PetscFunctionBegin; 1070 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1071 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1072 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1073 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 1081 PetscFunctionBegin; 1082 /* do nondiagonal part */ 1083 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1084 /* do local part */ 1085 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1086 /* add partial results together */ 1087 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1088 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1089 PetscFunctionReturn(PETSC_SUCCESS); 1090 } 1091 1092 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1093 { 1094 MPI_Comm comm; 1095 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1096 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1097 IS Me, Notme; 1098 PetscInt M, N, first, last, *notme, i; 1099 PetscBool lf; 1100 PetscMPIInt size; 1101 1102 PetscFunctionBegin; 1103 /* Easy test: symmetric diagonal block */ 1104 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1105 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1106 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1107 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1108 PetscCallMPI(MPI_Comm_size(comm, &size)); 1109 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1110 1111 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1112 PetscCall(MatGetSize(Amat, &M, &N)); 1113 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1114 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1115 for (i = 0; i < first; i++) notme[i] = i; 1116 for (i = last; i < M; i++) notme[i - last + first] = i; 1117 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1118 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1119 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1120 Aoff = Aoffs[0]; 1121 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1122 Boff = Boffs[0]; 1123 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1124 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1125 PetscCall(MatDestroyMatrices(1, &Boffs)); 1126 PetscCall(ISDestroy(&Me)); 1127 PetscCall(ISDestroy(&Notme)); 1128 PetscCall(PetscFree(notme)); 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131 1132 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1133 { 1134 PetscFunctionBegin; 1135 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1136 PetscFunctionReturn(PETSC_SUCCESS); 1137 } 1138 1139 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1140 { 1141 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1142 1143 PetscFunctionBegin; 1144 /* do nondiagonal part */ 1145 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1146 /* do local part */ 1147 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1148 /* add partial results together */ 1149 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1150 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1151 PetscFunctionReturn(PETSC_SUCCESS); 1152 } 1153 1154 /* 1155 This only works correctly for square matrices where the subblock A->A is the 1156 diagonal block 1157 */ 1158 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1159 { 1160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1161 1162 PetscFunctionBegin; 1163 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1164 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1165 PetscCall(MatGetDiagonal(a->A, v)); 1166 PetscFunctionReturn(PETSC_SUCCESS); 1167 } 1168 1169 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1170 { 1171 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1172 1173 PetscFunctionBegin; 1174 PetscCall(MatScale(a->A, aa)); 1175 PetscCall(MatScale(a->B, aa)); 1176 PetscFunctionReturn(PETSC_SUCCESS); 1177 } 1178 1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1180 { 1181 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1182 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1183 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1184 const PetscInt *garray = aij->garray; 1185 const PetscScalar *aa, *ba; 1186 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1187 PetscInt64 nz, hnz; 1188 PetscInt *rowlens; 1189 PetscInt *colidxs; 1190 PetscScalar *matvals; 1191 PetscMPIInt rank; 1192 1193 PetscFunctionBegin; 1194 PetscCall(PetscViewerSetUp(viewer)); 1195 1196 M = mat->rmap->N; 1197 N = mat->cmap->N; 1198 m = mat->rmap->n; 1199 rs = mat->rmap->rstart; 1200 cs = mat->cmap->rstart; 1201 nz = A->nz + B->nz; 1202 1203 /* write matrix header */ 1204 header[0] = MAT_FILE_CLASSID; 1205 header[1] = M; 1206 header[2] = N; 1207 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1208 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1209 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1210 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1211 1212 /* fill in and store row lengths */ 1213 PetscCall(PetscMalloc1(m, &rowlens)); 1214 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1215 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1216 PetscCall(PetscFree(rowlens)); 1217 1218 /* fill in and store column indices */ 1219 PetscCall(PetscMalloc1(nz, &colidxs)); 1220 for (cnt = 0, i = 0; i < m; i++) { 1221 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1222 if (garray[B->j[jb]] > cs) break; 1223 colidxs[cnt++] = garray[B->j[jb]]; 1224 } 1225 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1226 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1227 } 1228 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1229 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1230 PetscCall(PetscFree(colidxs)); 1231 1232 /* fill in and store nonzero values */ 1233 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1235 PetscCall(PetscMalloc1(nz, &matvals)); 1236 for (cnt = 0, i = 0; i < m; i++) { 1237 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1242 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1243 } 1244 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1245 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1246 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1247 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1248 PetscCall(PetscFree(matvals)); 1249 1250 /* write block size option to the viewer's .info file */ 1251 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1252 PetscFunctionReturn(PETSC_SUCCESS); 1253 } 1254 1255 #include <petscdraw.h> 1256 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1257 { 1258 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1259 PetscMPIInt rank = aij->rank, size = aij->size; 1260 PetscBool isdraw, iascii, isbinary; 1261 PetscViewer sviewer; 1262 PetscViewerFormat format; 1263 1264 PetscFunctionBegin; 1265 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1266 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1267 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1268 if (iascii) { 1269 PetscCall(PetscViewerGetFormat(viewer, &format)); 1270 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1271 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1272 PetscCall(PetscMalloc1(size, &nz)); 1273 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1274 for (i = 0; i < (PetscInt)size; i++) { 1275 nmax = PetscMax(nmax, nz[i]); 1276 nmin = PetscMin(nmin, nz[i]); 1277 navg += nz[i]; 1278 } 1279 PetscCall(PetscFree(nz)); 1280 navg = navg / size; 1281 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 PetscCall(PetscViewerGetFormat(viewer, &format)); 1285 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1286 MatInfo info; 1287 PetscInt *inodes = NULL; 1288 1289 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1290 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1291 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1292 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1293 if (!inodes) { 1294 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1295 (double)info.memory)); 1296 } else { 1297 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1298 (double)info.memory)); 1299 } 1300 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1301 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1302 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1303 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1304 PetscCall(PetscViewerFlush(viewer)); 1305 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1306 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1307 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1310 PetscInt inodecount, inodelimit, *inodes; 1311 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1312 if (inodes) { 1313 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1314 } else { 1315 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } 1321 } else if (isbinary) { 1322 if (size == 1) { 1323 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1324 PetscCall(MatView(aij->A, viewer)); 1325 } else { 1326 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1327 } 1328 PetscFunctionReturn(PETSC_SUCCESS); 1329 } else if (iascii && size == 1) { 1330 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1331 PetscCall(MatView(aij->A, viewer)); 1332 PetscFunctionReturn(PETSC_SUCCESS); 1333 } else if (isdraw) { 1334 PetscDraw draw; 1335 PetscBool isnull; 1336 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1337 PetscCall(PetscDrawIsNull(draw, &isnull)); 1338 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1339 } 1340 1341 { /* assemble the entire matrix onto first processor */ 1342 Mat A = NULL, Av; 1343 IS isrow, iscol; 1344 1345 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1346 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1347 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1348 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1349 /* The commented code uses MatCreateSubMatrices instead */ 1350 /* 1351 Mat *AA, A = NULL, Av; 1352 IS isrow,iscol; 1353 1354 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1355 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1356 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1357 if (rank == 0) { 1358 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1359 A = AA[0]; 1360 Av = AA[0]; 1361 } 1362 PetscCall(MatDestroySubMatrices(1,&AA)); 1363 */ 1364 PetscCall(ISDestroy(&iscol)); 1365 PetscCall(ISDestroy(&isrow)); 1366 /* 1367 Everyone has to call to draw the matrix since the graphics waits are 1368 synchronized across all processors that share the PetscDraw object 1369 */ 1370 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1371 if (rank == 0) { 1372 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1373 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1374 } 1375 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1376 PetscCall(PetscViewerFlush(viewer)); 1377 PetscCall(MatDestroy(&A)); 1378 } 1379 PetscFunctionReturn(PETSC_SUCCESS); 1380 } 1381 1382 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1383 { 1384 PetscBool iascii, isdraw, issocket, isbinary; 1385 1386 PetscFunctionBegin; 1387 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1388 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1389 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1390 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1391 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1396 { 1397 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1398 Vec bb1 = NULL; 1399 PetscBool hasop; 1400 1401 PetscFunctionBegin; 1402 if (flag == SOR_APPLY_UPPER) { 1403 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1404 PetscFunctionReturn(PETSC_SUCCESS); 1405 } 1406 1407 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1408 1409 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1410 if (flag & SOR_ZERO_INITIAL_GUESS) { 1411 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1412 its--; 1413 } 1414 1415 while (its--) { 1416 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1417 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1418 1419 /* update rhs: bb1 = bb - B*x */ 1420 PetscCall(VecScale(mat->lvec, -1.0)); 1421 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1422 1423 /* local sweep */ 1424 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1425 } 1426 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1427 if (flag & SOR_ZERO_INITIAL_GUESS) { 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1429 its--; 1430 } 1431 while (its--) { 1432 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1433 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1434 1435 /* update rhs: bb1 = bb - B*x */ 1436 PetscCall(VecScale(mat->lvec, -1.0)); 1437 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1438 1439 /* local sweep */ 1440 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1441 } 1442 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1443 if (flag & SOR_ZERO_INITIAL_GUESS) { 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1445 its--; 1446 } 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec, -1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1457 } 1458 } else if (flag & SOR_EISENSTAT) { 1459 Vec xx1; 1460 1461 PetscCall(VecDuplicate(bb, &xx1)); 1462 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1463 1464 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1466 if (!mat->diag) { 1467 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1468 PetscCall(MatGetDiagonal(matin, mat->diag)); 1469 } 1470 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1471 if (hasop) { 1472 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1473 } else { 1474 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1475 } 1476 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1477 1478 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1479 1480 /* local sweep */ 1481 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1482 PetscCall(VecAXPY(xx, 1.0, xx1)); 1483 PetscCall(VecDestroy(&xx1)); 1484 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1485 1486 PetscCall(VecDestroy(&bb1)); 1487 1488 matin->factorerrortype = mat->A->factorerrortype; 1489 PetscFunctionReturn(PETSC_SUCCESS); 1490 } 1491 1492 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1493 { 1494 Mat aA, aB, Aperm; 1495 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1496 PetscScalar *aa, *ba; 1497 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1498 PetscSF rowsf, sf; 1499 IS parcolp = NULL; 1500 PetscBool done; 1501 1502 PetscFunctionBegin; 1503 PetscCall(MatGetLocalSize(A, &m, &n)); 1504 PetscCall(ISGetIndices(rowp, &rwant)); 1505 PetscCall(ISGetIndices(colp, &cwant)); 1506 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1507 1508 /* Invert row permutation to find out where my rows should go */ 1509 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1510 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1511 PetscCall(PetscSFSetFromOptions(rowsf)); 1512 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1513 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1514 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1515 1516 /* Invert column permutation to find out where my columns should go */ 1517 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1518 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1519 PetscCall(PetscSFSetFromOptions(sf)); 1520 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1521 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1522 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1523 PetscCall(PetscSFDestroy(&sf)); 1524 1525 PetscCall(ISRestoreIndices(rowp, &rwant)); 1526 PetscCall(ISRestoreIndices(colp, &cwant)); 1527 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1528 1529 /* Find out where my gcols should go */ 1530 PetscCall(MatGetSize(aB, NULL, &ng)); 1531 PetscCall(PetscMalloc1(ng, &gcdest)); 1532 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1533 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1534 PetscCall(PetscSFSetFromOptions(sf)); 1535 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1536 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1537 PetscCall(PetscSFDestroy(&sf)); 1538 1539 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1540 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1541 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1542 for (i = 0; i < m; i++) { 1543 PetscInt row = rdest[i]; 1544 PetscMPIInt rowner; 1545 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1546 for (j = ai[i]; j < ai[i + 1]; j++) { 1547 PetscInt col = cdest[aj[j]]; 1548 PetscMPIInt cowner; 1549 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1550 if (rowner == cowner) dnnz[i]++; 1551 else onnz[i]++; 1552 } 1553 for (j = bi[i]; j < bi[i + 1]; j++) { 1554 PetscInt col = gcdest[bj[j]]; 1555 PetscMPIInt cowner; 1556 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1557 if (rowner == cowner) dnnz[i]++; 1558 else onnz[i]++; 1559 } 1560 } 1561 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1562 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1563 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1564 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1565 PetscCall(PetscSFDestroy(&rowsf)); 1566 1567 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1568 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1569 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1570 for (i = 0; i < m; i++) { 1571 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1572 PetscInt j0, rowlen; 1573 rowlen = ai[i + 1] - ai[i]; 1574 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1575 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1576 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1577 } 1578 rowlen = bi[i + 1] - bi[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { 1580 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1582 } 1583 } 1584 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1585 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1586 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1587 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1588 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1589 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1590 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1591 PetscCall(PetscFree3(work, rdest, cdest)); 1592 PetscCall(PetscFree(gcdest)); 1593 if (parcolp) PetscCall(ISDestroy(&colp)); 1594 *B = Aperm; 1595 PetscFunctionReturn(PETSC_SUCCESS); 1596 } 1597 1598 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1599 { 1600 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1601 1602 PetscFunctionBegin; 1603 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1604 if (ghosts) *ghosts = aij->garray; 1605 PetscFunctionReturn(PETSC_SUCCESS); 1606 } 1607 1608 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1609 { 1610 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1611 Mat A = mat->A, B = mat->B; 1612 PetscLogDouble isend[5], irecv[5]; 1613 1614 PetscFunctionBegin; 1615 info->block_size = 1.0; 1616 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1617 1618 isend[0] = info->nz_used; 1619 isend[1] = info->nz_allocated; 1620 isend[2] = info->nz_unneeded; 1621 isend[3] = info->memory; 1622 isend[4] = info->mallocs; 1623 1624 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1625 1626 isend[0] += info->nz_used; 1627 isend[1] += info->nz_allocated; 1628 isend[2] += info->nz_unneeded; 1629 isend[3] += info->memory; 1630 isend[4] += info->mallocs; 1631 if (flag == MAT_LOCAL) { 1632 info->nz_used = isend[0]; 1633 info->nz_allocated = isend[1]; 1634 info->nz_unneeded = isend[2]; 1635 info->memory = isend[3]; 1636 info->mallocs = isend[4]; 1637 } else if (flag == MAT_GLOBAL_MAX) { 1638 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1639 1640 info->nz_used = irecv[0]; 1641 info->nz_allocated = irecv[1]; 1642 info->nz_unneeded = irecv[2]; 1643 info->memory = irecv[3]; 1644 info->mallocs = irecv[4]; 1645 } else if (flag == MAT_GLOBAL_SUM) { 1646 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1647 1648 info->nz_used = irecv[0]; 1649 info->nz_allocated = irecv[1]; 1650 info->nz_unneeded = irecv[2]; 1651 info->memory = irecv[3]; 1652 info->mallocs = irecv[4]; 1653 } 1654 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1655 info->fill_ratio_needed = 0; 1656 info->factor_mallocs = 0; 1657 PetscFunctionReturn(PETSC_SUCCESS); 1658 } 1659 1660 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1661 { 1662 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1663 1664 PetscFunctionBegin; 1665 switch (op) { 1666 case MAT_NEW_NONZERO_LOCATIONS: 1667 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1668 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1669 case MAT_KEEP_NONZERO_PATTERN: 1670 case MAT_NEW_NONZERO_LOCATION_ERR: 1671 case MAT_USE_INODES: 1672 case MAT_IGNORE_ZERO_ENTRIES: 1673 case MAT_FORM_EXPLICIT_TRANSPOSE: 1674 MatCheckPreallocated(A, 1); 1675 PetscCall(MatSetOption(a->A, op, flg)); 1676 PetscCall(MatSetOption(a->B, op, flg)); 1677 break; 1678 case MAT_ROW_ORIENTED: 1679 MatCheckPreallocated(A, 1); 1680 a->roworiented = flg; 1681 1682 PetscCall(MatSetOption(a->A, op, flg)); 1683 PetscCall(MatSetOption(a->B, op, flg)); 1684 break; 1685 case MAT_FORCE_DIAGONAL_ENTRIES: 1686 case MAT_SORTED_FULL: 1687 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1688 break; 1689 case MAT_IGNORE_OFF_PROC_ENTRIES: 1690 a->donotstash = flg; 1691 break; 1692 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1693 case MAT_SPD: 1694 case MAT_SYMMETRIC: 1695 case MAT_STRUCTURALLY_SYMMETRIC: 1696 case MAT_HERMITIAN: 1697 case MAT_SYMMETRY_ETERNAL: 1698 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1699 case MAT_SPD_ETERNAL: 1700 /* if the diagonal matrix is square it inherits some of the properties above */ 1701 break; 1702 case MAT_SUBMAT_SINGLEIS: 1703 A->submat_singleis = flg; 1704 break; 1705 case MAT_STRUCTURE_ONLY: 1706 /* The option is handled directly by MatSetOption() */ 1707 break; 1708 default: 1709 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1710 } 1711 PetscFunctionReturn(PETSC_SUCCESS); 1712 } 1713 1714 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1715 { 1716 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1717 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1718 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1719 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1720 PetscInt *cmap, *idx_p; 1721 1722 PetscFunctionBegin; 1723 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1724 mat->getrowactive = PETSC_TRUE; 1725 1726 if (!mat->rowvalues && (idx || v)) { 1727 /* 1728 allocate enough space to hold information from the longest row. 1729 */ 1730 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1731 PetscInt max = 1, tmp; 1732 for (i = 0; i < matin->rmap->n; i++) { 1733 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1734 if (max < tmp) max = tmp; 1735 } 1736 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1737 } 1738 1739 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1740 lrow = row - rstart; 1741 1742 pvA = &vworkA; 1743 pcA = &cworkA; 1744 pvB = &vworkB; 1745 pcB = &cworkB; 1746 if (!v) { 1747 pvA = NULL; 1748 pvB = NULL; 1749 } 1750 if (!idx) { 1751 pcA = NULL; 1752 if (!v) pcB = NULL; 1753 } 1754 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1755 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1756 nztot = nzA + nzB; 1757 1758 cmap = mat->garray; 1759 if (v || idx) { 1760 if (nztot) { 1761 /* Sort by increasing column numbers, assuming A and B already sorted */ 1762 PetscInt imark = -1; 1763 if (v) { 1764 *v = v_p = mat->rowvalues; 1765 for (i = 0; i < nzB; i++) { 1766 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1767 else break; 1768 } 1769 imark = i; 1770 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1771 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1772 } 1773 if (idx) { 1774 *idx = idx_p = mat->rowindices; 1775 if (imark > -1) { 1776 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1777 } else { 1778 for (i = 0; i < nzB; i++) { 1779 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1780 else break; 1781 } 1782 imark = i; 1783 } 1784 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1785 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1786 } 1787 } else { 1788 if (idx) *idx = NULL; 1789 if (v) *v = NULL; 1790 } 1791 } 1792 *nz = nztot; 1793 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1794 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1795 PetscFunctionReturn(PETSC_SUCCESS); 1796 } 1797 1798 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1799 { 1800 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1801 1802 PetscFunctionBegin; 1803 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1804 aij->getrowactive = PETSC_FALSE; 1805 PetscFunctionReturn(PETSC_SUCCESS); 1806 } 1807 1808 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1809 { 1810 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1811 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1812 PetscInt i, j, cstart = mat->cmap->rstart; 1813 PetscReal sum = 0.0; 1814 const MatScalar *v, *amata, *bmata; 1815 1816 PetscFunctionBegin; 1817 if (aij->size == 1) { 1818 PetscCall(MatNorm(aij->A, type, norm)); 1819 } else { 1820 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1821 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1822 if (type == NORM_FROBENIUS) { 1823 v = amata; 1824 for (i = 0; i < amat->nz; i++) { 1825 sum += PetscRealPart(PetscConj(*v) * (*v)); 1826 v++; 1827 } 1828 v = bmata; 1829 for (i = 0; i < bmat->nz; i++) { 1830 sum += PetscRealPart(PetscConj(*v) * (*v)); 1831 v++; 1832 } 1833 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1834 *norm = PetscSqrtReal(*norm); 1835 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1836 } else if (type == NORM_1) { /* max column norm */ 1837 PetscReal *tmp, *tmp2; 1838 PetscInt *jj, *garray = aij->garray; 1839 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1840 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1841 *norm = 0.0; 1842 v = amata; 1843 jj = amat->j; 1844 for (j = 0; j < amat->nz; j++) { 1845 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1846 v++; 1847 } 1848 v = bmata; 1849 jj = bmat->j; 1850 for (j = 0; j < bmat->nz; j++) { 1851 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1852 v++; 1853 } 1854 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1855 for (j = 0; j < mat->cmap->N; j++) { 1856 if (tmp2[j] > *norm) *norm = tmp2[j]; 1857 } 1858 PetscCall(PetscFree(tmp)); 1859 PetscCall(PetscFree(tmp2)); 1860 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1861 } else if (type == NORM_INFINITY) { /* max row norm */ 1862 PetscReal ntemp = 0.0; 1863 for (j = 0; j < aij->A->rmap->n; j++) { 1864 v = amata + amat->i[j]; 1865 sum = 0.0; 1866 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1867 sum += PetscAbsScalar(*v); 1868 v++; 1869 } 1870 v = bmata + bmat->i[j]; 1871 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1872 sum += PetscAbsScalar(*v); 1873 v++; 1874 } 1875 if (sum > ntemp) ntemp = sum; 1876 } 1877 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1878 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1879 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1880 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1881 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1882 } 1883 PetscFunctionReturn(PETSC_SUCCESS); 1884 } 1885 1886 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1887 { 1888 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1889 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1890 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1891 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1892 Mat B, A_diag, *B_diag; 1893 const MatScalar *pbv, *bv; 1894 1895 PetscFunctionBegin; 1896 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1897 ma = A->rmap->n; 1898 na = A->cmap->n; 1899 mb = a->B->rmap->n; 1900 nb = a->B->cmap->n; 1901 ai = Aloc->i; 1902 aj = Aloc->j; 1903 bi = Bloc->i; 1904 bj = Bloc->j; 1905 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1906 PetscInt *d_nnz, *g_nnz, *o_nnz; 1907 PetscSFNode *oloc; 1908 PETSC_UNUSED PetscSF sf; 1909 1910 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1911 /* compute d_nnz for preallocation */ 1912 PetscCall(PetscArrayzero(d_nnz, na)); 1913 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1914 /* compute local off-diagonal contributions */ 1915 PetscCall(PetscArrayzero(g_nnz, nb)); 1916 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1917 /* map those to global */ 1918 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1919 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1920 PetscCall(PetscSFSetFromOptions(sf)); 1921 PetscCall(PetscArrayzero(o_nnz, na)); 1922 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1923 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1924 PetscCall(PetscSFDestroy(&sf)); 1925 1926 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1927 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1928 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1929 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1930 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1931 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1932 } else { 1933 B = *matout; 1934 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1935 } 1936 1937 b = (Mat_MPIAIJ *)B->data; 1938 A_diag = a->A; 1939 B_diag = &b->A; 1940 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1941 A_diag_ncol = A_diag->cmap->N; 1942 B_diag_ilen = sub_B_diag->ilen; 1943 B_diag_i = sub_B_diag->i; 1944 1945 /* Set ilen for diagonal of B */ 1946 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1947 1948 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1949 very quickly (=without using MatSetValues), because all writes are local. */ 1950 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1951 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1952 1953 /* copy over the B part */ 1954 PetscCall(PetscMalloc1(bi[mb], &cols)); 1955 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1956 pbv = bv; 1957 row = A->rmap->rstart; 1958 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1959 cols_tmp = cols; 1960 for (i = 0; i < mb; i++) { 1961 ncol = bi[i + 1] - bi[i]; 1962 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1963 row++; 1964 pbv += ncol; 1965 cols_tmp += ncol; 1966 } 1967 PetscCall(PetscFree(cols)); 1968 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1969 1970 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1971 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1972 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1973 *matout = B; 1974 } else { 1975 PetscCall(MatHeaderMerge(A, &B)); 1976 } 1977 PetscFunctionReturn(PETSC_SUCCESS); 1978 } 1979 1980 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1981 { 1982 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1983 Mat a = aij->A, b = aij->B; 1984 PetscInt s1, s2, s3; 1985 1986 PetscFunctionBegin; 1987 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1988 if (rr) { 1989 PetscCall(VecGetLocalSize(rr, &s1)); 1990 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1991 /* Overlap communication with computation. */ 1992 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1993 } 1994 if (ll) { 1995 PetscCall(VecGetLocalSize(ll, &s1)); 1996 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1997 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1998 } 1999 /* scale the diagonal block */ 2000 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2001 2002 if (rr) { 2003 /* Do a scatter end and then right scale the off-diagonal block */ 2004 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2005 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2006 } 2007 PetscFunctionReturn(PETSC_SUCCESS); 2008 } 2009 2010 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2011 { 2012 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2013 2014 PetscFunctionBegin; 2015 PetscCall(MatSetUnfactored(a->A)); 2016 PetscFunctionReturn(PETSC_SUCCESS); 2017 } 2018 2019 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2020 { 2021 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2022 Mat a, b, c, d; 2023 PetscBool flg; 2024 2025 PetscFunctionBegin; 2026 a = matA->A; 2027 b = matA->B; 2028 c = matB->A; 2029 d = matB->B; 2030 2031 PetscCall(MatEqual(a, c, &flg)); 2032 if (flg) PetscCall(MatEqual(b, d, &flg)); 2033 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2034 PetscFunctionReturn(PETSC_SUCCESS); 2035 } 2036 2037 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2038 { 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2040 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2041 2042 PetscFunctionBegin; 2043 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2044 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2045 /* because of the column compression in the off-processor part of the matrix a->B, 2046 the number of columns in a->B and b->B may be different, hence we cannot call 2047 the MatCopy() directly on the two parts. If need be, we can provide a more 2048 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2049 then copying the submatrices */ 2050 PetscCall(MatCopy_Basic(A, B, str)); 2051 } else { 2052 PetscCall(MatCopy(a->A, b->A, str)); 2053 PetscCall(MatCopy(a->B, b->B, str)); 2054 } 2055 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2056 PetscFunctionReturn(PETSC_SUCCESS); 2057 } 2058 2059 /* 2060 Computes the number of nonzeros per row needed for preallocation when X and Y 2061 have different nonzero structure. 2062 */ 2063 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2064 { 2065 PetscInt i, j, k, nzx, nzy; 2066 2067 PetscFunctionBegin; 2068 /* Set the number of nonzeros in the new matrix */ 2069 for (i = 0; i < m; i++) { 2070 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2071 nzx = xi[i + 1] - xi[i]; 2072 nzy = yi[i + 1] - yi[i]; 2073 nnz[i] = 0; 2074 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2075 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2076 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2077 nnz[i]++; 2078 } 2079 for (; k < nzy; k++) nnz[i]++; 2080 } 2081 PetscFunctionReturn(PETSC_SUCCESS); 2082 } 2083 2084 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2085 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2086 { 2087 PetscInt m = Y->rmap->N; 2088 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2089 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2090 2091 PetscFunctionBegin; 2092 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2093 PetscFunctionReturn(PETSC_SUCCESS); 2094 } 2095 2096 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2097 { 2098 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2099 2100 PetscFunctionBegin; 2101 if (str == SAME_NONZERO_PATTERN) { 2102 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2103 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2104 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2105 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2106 } else { 2107 Mat B; 2108 PetscInt *nnz_d, *nnz_o; 2109 2110 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2111 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2112 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2113 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2114 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2115 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2116 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2117 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2118 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2119 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2120 PetscCall(MatHeaderMerge(Y, &B)); 2121 PetscCall(PetscFree(nnz_d)); 2122 PetscCall(PetscFree(nnz_o)); 2123 } 2124 PetscFunctionReturn(PETSC_SUCCESS); 2125 } 2126 2127 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2128 2129 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2130 { 2131 PetscFunctionBegin; 2132 if (PetscDefined(USE_COMPLEX)) { 2133 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2134 2135 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2136 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2137 } 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2144 2145 PetscFunctionBegin; 2146 PetscCall(MatRealPart(a->A)); 2147 PetscCall(MatRealPart(a->B)); 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2152 { 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2154 2155 PetscFunctionBegin; 2156 PetscCall(MatImaginaryPart(a->A)); 2157 PetscCall(MatImaginaryPart(a->B)); 2158 PetscFunctionReturn(PETSC_SUCCESS); 2159 } 2160 2161 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2162 { 2163 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2164 PetscInt i, *idxb = NULL, m = A->rmap->n; 2165 PetscScalar *va, *vv; 2166 Vec vB, vA; 2167 const PetscScalar *vb; 2168 2169 PetscFunctionBegin; 2170 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2171 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2172 2173 PetscCall(VecGetArrayWrite(vA, &va)); 2174 if (idx) { 2175 for (i = 0; i < m; i++) { 2176 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2177 } 2178 } 2179 2180 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2181 PetscCall(PetscMalloc1(m, &idxb)); 2182 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2183 2184 PetscCall(VecGetArrayWrite(v, &vv)); 2185 PetscCall(VecGetArrayRead(vB, &vb)); 2186 for (i = 0; i < m; i++) { 2187 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2188 vv[i] = vb[i]; 2189 if (idx) idx[i] = a->garray[idxb[i]]; 2190 } else { 2191 vv[i] = va[i]; 2192 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2193 } 2194 } 2195 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2196 PetscCall(VecRestoreArrayWrite(vA, &va)); 2197 PetscCall(VecRestoreArrayRead(vB, &vb)); 2198 PetscCall(PetscFree(idxb)); 2199 PetscCall(VecDestroy(&vA)); 2200 PetscCall(VecDestroy(&vB)); 2201 PetscFunctionReturn(PETSC_SUCCESS); 2202 } 2203 2204 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2205 { 2206 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2207 PetscInt m = A->rmap->n, n = A->cmap->n; 2208 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2209 PetscInt *cmap = mat->garray; 2210 PetscInt *diagIdx, *offdiagIdx; 2211 Vec diagV, offdiagV; 2212 PetscScalar *a, *diagA, *offdiagA; 2213 const PetscScalar *ba, *bav; 2214 PetscInt r, j, col, ncols, *bi, *bj; 2215 Mat B = mat->B; 2216 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2217 2218 PetscFunctionBegin; 2219 /* When a process holds entire A and other processes have no entry */ 2220 if (A->cmap->N == n) { 2221 PetscCall(VecGetArrayWrite(v, &diagA)); 2222 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2223 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2224 PetscCall(VecDestroy(&diagV)); 2225 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2226 PetscFunctionReturn(PETSC_SUCCESS); 2227 } else if (n == 0) { 2228 if (m) { 2229 PetscCall(VecGetArrayWrite(v, &a)); 2230 for (r = 0; r < m; r++) { 2231 a[r] = 0.0; 2232 if (idx) idx[r] = -1; 2233 } 2234 PetscCall(VecRestoreArrayWrite(v, &a)); 2235 } 2236 PetscFunctionReturn(PETSC_SUCCESS); 2237 } 2238 2239 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2240 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2241 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2242 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2243 2244 /* Get offdiagIdx[] for implicit 0.0 */ 2245 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2246 ba = bav; 2247 bi = b->i; 2248 bj = b->j; 2249 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2250 for (r = 0; r < m; r++) { 2251 ncols = bi[r + 1] - bi[r]; 2252 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2253 offdiagA[r] = *ba; 2254 offdiagIdx[r] = cmap[0]; 2255 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2256 offdiagA[r] = 0.0; 2257 2258 /* Find first hole in the cmap */ 2259 for (j = 0; j < ncols; j++) { 2260 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2261 if (col > j && j < cstart) { 2262 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2263 break; 2264 } else if (col > j + n && j >= cstart) { 2265 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2266 break; 2267 } 2268 } 2269 if (j == ncols && ncols < A->cmap->N - n) { 2270 /* a hole is outside compressed Bcols */ 2271 if (ncols == 0) { 2272 if (cstart) { 2273 offdiagIdx[r] = 0; 2274 } else offdiagIdx[r] = cend; 2275 } else { /* ncols > 0 */ 2276 offdiagIdx[r] = cmap[ncols - 1] + 1; 2277 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2278 } 2279 } 2280 } 2281 2282 for (j = 0; j < ncols; j++) { 2283 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2284 offdiagA[r] = *ba; 2285 offdiagIdx[r] = cmap[*bj]; 2286 } 2287 ba++; 2288 bj++; 2289 } 2290 } 2291 2292 PetscCall(VecGetArrayWrite(v, &a)); 2293 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2294 for (r = 0; r < m; ++r) { 2295 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2296 a[r] = diagA[r]; 2297 if (idx) idx[r] = cstart + diagIdx[r]; 2298 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2299 a[r] = diagA[r]; 2300 if (idx) { 2301 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2302 idx[r] = cstart + diagIdx[r]; 2303 } else idx[r] = offdiagIdx[r]; 2304 } 2305 } else { 2306 a[r] = offdiagA[r]; 2307 if (idx) idx[r] = offdiagIdx[r]; 2308 } 2309 } 2310 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2311 PetscCall(VecRestoreArrayWrite(v, &a)); 2312 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2313 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2314 PetscCall(VecDestroy(&diagV)); 2315 PetscCall(VecDestroy(&offdiagV)); 2316 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2317 PetscFunctionReturn(PETSC_SUCCESS); 2318 } 2319 2320 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2321 { 2322 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2323 PetscInt m = A->rmap->n, n = A->cmap->n; 2324 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2325 PetscInt *cmap = mat->garray; 2326 PetscInt *diagIdx, *offdiagIdx; 2327 Vec diagV, offdiagV; 2328 PetscScalar *a, *diagA, *offdiagA; 2329 const PetscScalar *ba, *bav; 2330 PetscInt r, j, col, ncols, *bi, *bj; 2331 Mat B = mat->B; 2332 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2333 2334 PetscFunctionBegin; 2335 /* When a process holds entire A and other processes have no entry */ 2336 if (A->cmap->N == n) { 2337 PetscCall(VecGetArrayWrite(v, &diagA)); 2338 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2339 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2340 PetscCall(VecDestroy(&diagV)); 2341 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2342 PetscFunctionReturn(PETSC_SUCCESS); 2343 } else if (n == 0) { 2344 if (m) { 2345 PetscCall(VecGetArrayWrite(v, &a)); 2346 for (r = 0; r < m; r++) { 2347 a[r] = PETSC_MAX_REAL; 2348 if (idx) idx[r] = -1; 2349 } 2350 PetscCall(VecRestoreArrayWrite(v, &a)); 2351 } 2352 PetscFunctionReturn(PETSC_SUCCESS); 2353 } 2354 2355 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2357 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2358 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2359 2360 /* Get offdiagIdx[] for implicit 0.0 */ 2361 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2362 ba = bav; 2363 bi = b->i; 2364 bj = b->j; 2365 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2366 for (r = 0; r < m; r++) { 2367 ncols = bi[r + 1] - bi[r]; 2368 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2369 offdiagA[r] = *ba; 2370 offdiagIdx[r] = cmap[0]; 2371 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2372 offdiagA[r] = 0.0; 2373 2374 /* Find first hole in the cmap */ 2375 for (j = 0; j < ncols; j++) { 2376 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2377 if (col > j && j < cstart) { 2378 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2379 break; 2380 } else if (col > j + n && j >= cstart) { 2381 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2382 break; 2383 } 2384 } 2385 if (j == ncols && ncols < A->cmap->N - n) { 2386 /* a hole is outside compressed Bcols */ 2387 if (ncols == 0) { 2388 if (cstart) { 2389 offdiagIdx[r] = 0; 2390 } else offdiagIdx[r] = cend; 2391 } else { /* ncols > 0 */ 2392 offdiagIdx[r] = cmap[ncols - 1] + 1; 2393 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2394 } 2395 } 2396 } 2397 2398 for (j = 0; j < ncols; j++) { 2399 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2400 offdiagA[r] = *ba; 2401 offdiagIdx[r] = cmap[*bj]; 2402 } 2403 ba++; 2404 bj++; 2405 } 2406 } 2407 2408 PetscCall(VecGetArrayWrite(v, &a)); 2409 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2410 for (r = 0; r < m; ++r) { 2411 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2412 a[r] = diagA[r]; 2413 if (idx) idx[r] = cstart + diagIdx[r]; 2414 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2415 a[r] = diagA[r]; 2416 if (idx) { 2417 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2418 idx[r] = cstart + diagIdx[r]; 2419 } else idx[r] = offdiagIdx[r]; 2420 } 2421 } else { 2422 a[r] = offdiagA[r]; 2423 if (idx) idx[r] = offdiagIdx[r]; 2424 } 2425 } 2426 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2427 PetscCall(VecRestoreArrayWrite(v, &a)); 2428 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2429 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2430 PetscCall(VecDestroy(&diagV)); 2431 PetscCall(VecDestroy(&offdiagV)); 2432 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2433 PetscFunctionReturn(PETSC_SUCCESS); 2434 } 2435 2436 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2437 { 2438 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2439 PetscInt m = A->rmap->n, n = A->cmap->n; 2440 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2441 PetscInt *cmap = mat->garray; 2442 PetscInt *diagIdx, *offdiagIdx; 2443 Vec diagV, offdiagV; 2444 PetscScalar *a, *diagA, *offdiagA; 2445 const PetscScalar *ba, *bav; 2446 PetscInt r, j, col, ncols, *bi, *bj; 2447 Mat B = mat->B; 2448 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2449 2450 PetscFunctionBegin; 2451 /* When a process holds entire A and other processes have no entry */ 2452 if (A->cmap->N == n) { 2453 PetscCall(VecGetArrayWrite(v, &diagA)); 2454 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2455 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2456 PetscCall(VecDestroy(&diagV)); 2457 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2458 PetscFunctionReturn(PETSC_SUCCESS); 2459 } else if (n == 0) { 2460 if (m) { 2461 PetscCall(VecGetArrayWrite(v, &a)); 2462 for (r = 0; r < m; r++) { 2463 a[r] = PETSC_MIN_REAL; 2464 if (idx) idx[r] = -1; 2465 } 2466 PetscCall(VecRestoreArrayWrite(v, &a)); 2467 } 2468 PetscFunctionReturn(PETSC_SUCCESS); 2469 } 2470 2471 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2472 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2473 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2474 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2475 2476 /* Get offdiagIdx[] for implicit 0.0 */ 2477 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2478 ba = bav; 2479 bi = b->i; 2480 bj = b->j; 2481 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2482 for (r = 0; r < m; r++) { 2483 ncols = bi[r + 1] - bi[r]; 2484 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2485 offdiagA[r] = *ba; 2486 offdiagIdx[r] = cmap[0]; 2487 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2488 offdiagA[r] = 0.0; 2489 2490 /* Find first hole in the cmap */ 2491 for (j = 0; j < ncols; j++) { 2492 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2493 if (col > j && j < cstart) { 2494 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2495 break; 2496 } else if (col > j + n && j >= cstart) { 2497 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2498 break; 2499 } 2500 } 2501 if (j == ncols && ncols < A->cmap->N - n) { 2502 /* a hole is outside compressed Bcols */ 2503 if (ncols == 0) { 2504 if (cstart) { 2505 offdiagIdx[r] = 0; 2506 } else offdiagIdx[r] = cend; 2507 } else { /* ncols > 0 */ 2508 offdiagIdx[r] = cmap[ncols - 1] + 1; 2509 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2510 } 2511 } 2512 } 2513 2514 for (j = 0; j < ncols; j++) { 2515 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2516 offdiagA[r] = *ba; 2517 offdiagIdx[r] = cmap[*bj]; 2518 } 2519 ba++; 2520 bj++; 2521 } 2522 } 2523 2524 PetscCall(VecGetArrayWrite(v, &a)); 2525 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2526 for (r = 0; r < m; ++r) { 2527 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2528 a[r] = diagA[r]; 2529 if (idx) idx[r] = cstart + diagIdx[r]; 2530 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2531 a[r] = diagA[r]; 2532 if (idx) { 2533 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2534 idx[r] = cstart + diagIdx[r]; 2535 } else idx[r] = offdiagIdx[r]; 2536 } 2537 } else { 2538 a[r] = offdiagA[r]; 2539 if (idx) idx[r] = offdiagIdx[r]; 2540 } 2541 } 2542 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2543 PetscCall(VecRestoreArrayWrite(v, &a)); 2544 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2545 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2546 PetscCall(VecDestroy(&diagV)); 2547 PetscCall(VecDestroy(&offdiagV)); 2548 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2549 PetscFunctionReturn(PETSC_SUCCESS); 2550 } 2551 2552 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2553 { 2554 Mat *dummy; 2555 2556 PetscFunctionBegin; 2557 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2558 *newmat = *dummy; 2559 PetscCall(PetscFree(dummy)); 2560 PetscFunctionReturn(PETSC_SUCCESS); 2561 } 2562 2563 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2564 { 2565 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2566 2567 PetscFunctionBegin; 2568 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2569 A->factorerrortype = a->A->factorerrortype; 2570 PetscFunctionReturn(PETSC_SUCCESS); 2571 } 2572 2573 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2574 { 2575 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2576 2577 PetscFunctionBegin; 2578 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2579 PetscCall(MatSetRandom(aij->A, rctx)); 2580 if (x->assembled) { 2581 PetscCall(MatSetRandom(aij->B, rctx)); 2582 } else { 2583 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2584 } 2585 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2586 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2587 PetscFunctionReturn(PETSC_SUCCESS); 2588 } 2589 2590 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2591 { 2592 PetscFunctionBegin; 2593 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2594 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2595 PetscFunctionReturn(PETSC_SUCCESS); 2596 } 2597 2598 /*@ 2599 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2600 2601 Not Collective 2602 2603 Input Parameter: 2604 . A - the matrix 2605 2606 Output Parameter: 2607 . nz - the number of nonzeros 2608 2609 Level: advanced 2610 2611 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `Mat` 2612 @*/ 2613 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2614 { 2615 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2616 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2617 PetscBool isaij; 2618 2619 PetscFunctionBegin; 2620 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2621 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2622 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2623 PetscFunctionReturn(PETSC_SUCCESS); 2624 } 2625 2626 /*@ 2627 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2628 2629 Collective 2630 2631 Input Parameters: 2632 + A - the matrix 2633 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2634 2635 Level: advanced 2636 2637 .seealso: [](chapter_matrices), `Mat`, `Mat`, `MATMPIAIJ` 2638 @*/ 2639 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2640 { 2641 PetscFunctionBegin; 2642 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2643 PetscFunctionReturn(PETSC_SUCCESS); 2644 } 2645 2646 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2647 { 2648 PetscBool sc = PETSC_FALSE, flg; 2649 2650 PetscFunctionBegin; 2651 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2652 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2653 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2654 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2655 PetscOptionsHeadEnd(); 2656 PetscFunctionReturn(PETSC_SUCCESS); 2657 } 2658 2659 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2660 { 2661 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2662 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2663 2664 PetscFunctionBegin; 2665 if (!Y->preallocated) { 2666 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2667 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2668 PetscInt nonew = aij->nonew; 2669 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2670 aij->nonew = nonew; 2671 } 2672 PetscCall(MatShift_Basic(Y, a)); 2673 PetscFunctionReturn(PETSC_SUCCESS); 2674 } 2675 2676 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2677 { 2678 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2679 2680 PetscFunctionBegin; 2681 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2682 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2683 if (d) { 2684 PetscInt rstart; 2685 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2686 *d += rstart; 2687 } 2688 PetscFunctionReturn(PETSC_SUCCESS); 2689 } 2690 2691 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2692 { 2693 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2694 2695 PetscFunctionBegin; 2696 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2697 PetscFunctionReturn(PETSC_SUCCESS); 2698 } 2699 2700 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2701 { 2702 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2703 2704 PetscFunctionBegin; 2705 PetscCall(MatEliminateZeros(a->A)); 2706 PetscCall(MatEliminateZeros(a->B)); 2707 PetscFunctionReturn(PETSC_SUCCESS); 2708 } 2709 2710 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2711 MatGetRow_MPIAIJ, 2712 MatRestoreRow_MPIAIJ, 2713 MatMult_MPIAIJ, 2714 /* 4*/ MatMultAdd_MPIAIJ, 2715 MatMultTranspose_MPIAIJ, 2716 MatMultTransposeAdd_MPIAIJ, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*10*/ NULL, 2721 NULL, 2722 NULL, 2723 MatSOR_MPIAIJ, 2724 MatTranspose_MPIAIJ, 2725 /*15*/ MatGetInfo_MPIAIJ, 2726 MatEqual_MPIAIJ, 2727 MatGetDiagonal_MPIAIJ, 2728 MatDiagonalScale_MPIAIJ, 2729 MatNorm_MPIAIJ, 2730 /*20*/ MatAssemblyBegin_MPIAIJ, 2731 MatAssemblyEnd_MPIAIJ, 2732 MatSetOption_MPIAIJ, 2733 MatZeroEntries_MPIAIJ, 2734 /*24*/ MatZeroRows_MPIAIJ, 2735 NULL, 2736 NULL, 2737 NULL, 2738 NULL, 2739 /*29*/ MatSetUp_MPI_Hash, 2740 NULL, 2741 NULL, 2742 MatGetDiagonalBlock_MPIAIJ, 2743 NULL, 2744 /*34*/ MatDuplicate_MPIAIJ, 2745 NULL, 2746 NULL, 2747 NULL, 2748 NULL, 2749 /*39*/ MatAXPY_MPIAIJ, 2750 MatCreateSubMatrices_MPIAIJ, 2751 MatIncreaseOverlap_MPIAIJ, 2752 MatGetValues_MPIAIJ, 2753 MatCopy_MPIAIJ, 2754 /*44*/ MatGetRowMax_MPIAIJ, 2755 MatScale_MPIAIJ, 2756 MatShift_MPIAIJ, 2757 MatDiagonalSet_MPIAIJ, 2758 MatZeroRowsColumns_MPIAIJ, 2759 /*49*/ MatSetRandom_MPIAIJ, 2760 MatGetRowIJ_MPIAIJ, 2761 MatRestoreRowIJ_MPIAIJ, 2762 NULL, 2763 NULL, 2764 /*54*/ MatFDColoringCreate_MPIXAIJ, 2765 NULL, 2766 MatSetUnfactored_MPIAIJ, 2767 MatPermute_MPIAIJ, 2768 NULL, 2769 /*59*/ MatCreateSubMatrix_MPIAIJ, 2770 MatDestroy_MPIAIJ, 2771 MatView_MPIAIJ, 2772 NULL, 2773 NULL, 2774 /*64*/ NULL, 2775 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 NULL, 2779 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2780 MatGetRowMinAbs_MPIAIJ, 2781 NULL, 2782 NULL, 2783 NULL, 2784 NULL, 2785 /*75*/ MatFDColoringApply_AIJ, 2786 MatSetFromOptions_MPIAIJ, 2787 NULL, 2788 NULL, 2789 MatFindZeroDiagonals_MPIAIJ, 2790 /*80*/ NULL, 2791 NULL, 2792 NULL, 2793 /*83*/ MatLoad_MPIAIJ, 2794 MatIsSymmetric_MPIAIJ, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*89*/ NULL, 2800 NULL, 2801 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2802 NULL, 2803 NULL, 2804 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2805 NULL, 2806 NULL, 2807 NULL, 2808 MatBindToCPU_MPIAIJ, 2809 /*99*/ MatProductSetFromOptions_MPIAIJ, 2810 NULL, 2811 NULL, 2812 MatConjugate_MPIAIJ, 2813 NULL, 2814 /*104*/ MatSetValuesRow_MPIAIJ, 2815 MatRealPart_MPIAIJ, 2816 MatImaginaryPart_MPIAIJ, 2817 NULL, 2818 NULL, 2819 /*109*/ NULL, 2820 NULL, 2821 MatGetRowMin_MPIAIJ, 2822 NULL, 2823 MatMissingDiagonal_MPIAIJ, 2824 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2825 NULL, 2826 MatGetGhosts_MPIAIJ, 2827 NULL, 2828 NULL, 2829 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2830 NULL, 2831 NULL, 2832 NULL, 2833 MatGetMultiProcBlock_MPIAIJ, 2834 /*124*/ MatFindNonzeroRows_MPIAIJ, 2835 MatGetColumnReductions_MPIAIJ, 2836 MatInvertBlockDiagonal_MPIAIJ, 2837 MatInvertVariableBlockDiagonal_MPIAIJ, 2838 MatCreateSubMatricesMPI_MPIAIJ, 2839 /*129*/ NULL, 2840 NULL, 2841 NULL, 2842 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2843 NULL, 2844 /*134*/ NULL, 2845 NULL, 2846 NULL, 2847 NULL, 2848 NULL, 2849 /*139*/ MatSetBlockSizes_MPIAIJ, 2850 NULL, 2851 NULL, 2852 MatFDColoringSetUp_MPIXAIJ, 2853 MatFindOffBlockDiagonalEntries_MPIAIJ, 2854 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2855 /*145*/ NULL, 2856 NULL, 2857 NULL, 2858 MatCreateGraph_Simple_AIJ, 2859 NULL, 2860 /*150*/ NULL, 2861 MatEliminateZeros_MPIAIJ}; 2862 2863 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2864 { 2865 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2866 2867 PetscFunctionBegin; 2868 PetscCall(MatStoreValues(aij->A)); 2869 PetscCall(MatStoreValues(aij->B)); 2870 PetscFunctionReturn(PETSC_SUCCESS); 2871 } 2872 2873 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2874 { 2875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2876 2877 PetscFunctionBegin; 2878 PetscCall(MatRetrieveValues(aij->A)); 2879 PetscCall(MatRetrieveValues(aij->B)); 2880 PetscFunctionReturn(PETSC_SUCCESS); 2881 } 2882 2883 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2884 { 2885 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2886 PetscMPIInt size; 2887 2888 PetscFunctionBegin; 2889 if (B->hash_active) { 2890 PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops)))); 2891 B->hash_active = PETSC_FALSE; 2892 } 2893 PetscCall(PetscLayoutSetUp(B->rmap)); 2894 PetscCall(PetscLayoutSetUp(B->cmap)); 2895 2896 #if defined(PETSC_USE_CTABLE) 2897 PetscCall(PetscHMapIDestroy(&b->colmap)); 2898 #else 2899 PetscCall(PetscFree(b->colmap)); 2900 #endif 2901 PetscCall(PetscFree(b->garray)); 2902 PetscCall(VecDestroy(&b->lvec)); 2903 PetscCall(VecScatterDestroy(&b->Mvctx)); 2904 2905 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2906 PetscCall(MatDestroy(&b->B)); 2907 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2908 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2909 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2910 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2911 2912 PetscCall(MatDestroy(&b->A)); 2913 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2914 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2915 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2916 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2917 2918 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2919 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2920 B->preallocated = PETSC_TRUE; 2921 B->was_assembled = PETSC_FALSE; 2922 B->assembled = PETSC_FALSE; 2923 PetscFunctionReturn(PETSC_SUCCESS); 2924 } 2925 2926 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2927 { 2928 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2929 2930 PetscFunctionBegin; 2931 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2932 PetscCall(PetscLayoutSetUp(B->rmap)); 2933 PetscCall(PetscLayoutSetUp(B->cmap)); 2934 2935 #if defined(PETSC_USE_CTABLE) 2936 PetscCall(PetscHMapIDestroy(&b->colmap)); 2937 #else 2938 PetscCall(PetscFree(b->colmap)); 2939 #endif 2940 PetscCall(PetscFree(b->garray)); 2941 PetscCall(VecDestroy(&b->lvec)); 2942 PetscCall(VecScatterDestroy(&b->Mvctx)); 2943 2944 PetscCall(MatResetPreallocation(b->A)); 2945 PetscCall(MatResetPreallocation(b->B)); 2946 B->preallocated = PETSC_TRUE; 2947 B->was_assembled = PETSC_FALSE; 2948 B->assembled = PETSC_FALSE; 2949 PetscFunctionReturn(PETSC_SUCCESS); 2950 } 2951 2952 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2953 { 2954 Mat mat; 2955 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2956 2957 PetscFunctionBegin; 2958 *newmat = NULL; 2959 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2960 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2961 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2962 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2963 a = (Mat_MPIAIJ *)mat->data; 2964 2965 mat->factortype = matin->factortype; 2966 mat->assembled = matin->assembled; 2967 mat->insertmode = NOT_SET_VALUES; 2968 mat->preallocated = matin->preallocated; 2969 2970 a->size = oldmat->size; 2971 a->rank = oldmat->rank; 2972 a->donotstash = oldmat->donotstash; 2973 a->roworiented = oldmat->roworiented; 2974 a->rowindices = NULL; 2975 a->rowvalues = NULL; 2976 a->getrowactive = PETSC_FALSE; 2977 2978 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2979 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2980 2981 if (oldmat->colmap) { 2982 #if defined(PETSC_USE_CTABLE) 2983 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2984 #else 2985 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2986 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2987 #endif 2988 } else a->colmap = NULL; 2989 if (oldmat->garray) { 2990 PetscInt len; 2991 len = oldmat->B->cmap->n; 2992 PetscCall(PetscMalloc1(len + 1, &a->garray)); 2993 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2994 } else a->garray = NULL; 2995 2996 /* It may happen MatDuplicate is called with a non-assembled matrix 2997 In fact, MatDuplicate only requires the matrix to be preallocated 2998 This may happen inside a DMCreateMatrix_Shell */ 2999 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3000 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3001 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3002 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3003 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3004 *newmat = mat; 3005 PetscFunctionReturn(PETSC_SUCCESS); 3006 } 3007 3008 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3009 { 3010 PetscBool isbinary, ishdf5; 3011 3012 PetscFunctionBegin; 3013 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3014 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3015 /* force binary viewer to load .info file if it has not yet done so */ 3016 PetscCall(PetscViewerSetUp(viewer)); 3017 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3019 if (isbinary) { 3020 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3021 } else if (ishdf5) { 3022 #if defined(PETSC_HAVE_HDF5) 3023 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3024 #else 3025 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3026 #endif 3027 } else { 3028 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3029 } 3030 PetscFunctionReturn(PETSC_SUCCESS); 3031 } 3032 3033 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3034 { 3035 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3036 PetscInt *rowidxs, *colidxs; 3037 PetscScalar *matvals; 3038 3039 PetscFunctionBegin; 3040 PetscCall(PetscViewerSetUp(viewer)); 3041 3042 /* read in matrix header */ 3043 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3044 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3045 M = header[1]; 3046 N = header[2]; 3047 nz = header[3]; 3048 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3049 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3050 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3051 3052 /* set block sizes from the viewer's .info file */ 3053 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3054 /* set global sizes if not set already */ 3055 if (mat->rmap->N < 0) mat->rmap->N = M; 3056 if (mat->cmap->N < 0) mat->cmap->N = N; 3057 PetscCall(PetscLayoutSetUp(mat->rmap)); 3058 PetscCall(PetscLayoutSetUp(mat->cmap)); 3059 3060 /* check if the matrix sizes are correct */ 3061 PetscCall(MatGetSize(mat, &rows, &cols)); 3062 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3063 3064 /* read in row lengths and build row indices */ 3065 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3066 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3067 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3068 rowidxs[0] = 0; 3069 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3070 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3071 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3072 /* read in column indices and matrix values */ 3073 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3074 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3075 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3076 /* store matrix indices and values */ 3077 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3078 PetscCall(PetscFree(rowidxs)); 3079 PetscCall(PetscFree2(colidxs, matvals)); 3080 PetscFunctionReturn(PETSC_SUCCESS); 3081 } 3082 3083 /* Not scalable because of ISAllGather() unless getting all columns. */ 3084 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3085 { 3086 IS iscol_local; 3087 PetscBool isstride; 3088 PetscMPIInt lisstride = 0, gisstride; 3089 3090 PetscFunctionBegin; 3091 /* check if we are grabbing all columns*/ 3092 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3093 3094 if (isstride) { 3095 PetscInt start, len, mstart, mlen; 3096 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3097 PetscCall(ISGetLocalSize(iscol, &len)); 3098 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3099 if (mstart == start && mlen - mstart == len) lisstride = 1; 3100 } 3101 3102 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3103 if (gisstride) { 3104 PetscInt N; 3105 PetscCall(MatGetSize(mat, NULL, &N)); 3106 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3107 PetscCall(ISSetIdentity(iscol_local)); 3108 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3109 } else { 3110 PetscInt cbs; 3111 PetscCall(ISGetBlockSize(iscol, &cbs)); 3112 PetscCall(ISAllGather(iscol, &iscol_local)); 3113 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3114 } 3115 3116 *isseq = iscol_local; 3117 PetscFunctionReturn(PETSC_SUCCESS); 3118 } 3119 3120 /* 3121 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3122 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3123 3124 Input Parameters: 3125 + mat - matrix 3126 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3127 i.e., mat->rstart <= isrow[i] < mat->rend 3128 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3129 i.e., mat->cstart <= iscol[i] < mat->cend 3130 3131 Output Parameters: 3132 + isrow_d - sequential row index set for retrieving mat->A 3133 . iscol_d - sequential column index set for retrieving mat->A 3134 . iscol_o - sequential column index set for retrieving mat->B 3135 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3136 */ 3137 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3138 { 3139 Vec x, cmap; 3140 const PetscInt *is_idx; 3141 PetscScalar *xarray, *cmaparray; 3142 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3143 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3144 Mat B = a->B; 3145 Vec lvec = a->lvec, lcmap; 3146 PetscInt i, cstart, cend, Bn = B->cmap->N; 3147 MPI_Comm comm; 3148 VecScatter Mvctx = a->Mvctx; 3149 3150 PetscFunctionBegin; 3151 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3152 PetscCall(ISGetLocalSize(iscol, &ncols)); 3153 3154 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3155 PetscCall(MatCreateVecs(mat, &x, NULL)); 3156 PetscCall(VecSet(x, -1.0)); 3157 PetscCall(VecDuplicate(x, &cmap)); 3158 PetscCall(VecSet(cmap, -1.0)); 3159 3160 /* Get start indices */ 3161 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3162 isstart -= ncols; 3163 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3164 3165 PetscCall(ISGetIndices(iscol, &is_idx)); 3166 PetscCall(VecGetArray(x, &xarray)); 3167 PetscCall(VecGetArray(cmap, &cmaparray)); 3168 PetscCall(PetscMalloc1(ncols, &idx)); 3169 for (i = 0; i < ncols; i++) { 3170 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3171 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3172 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3173 } 3174 PetscCall(VecRestoreArray(x, &xarray)); 3175 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3176 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3177 3178 /* Get iscol_d */ 3179 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3180 PetscCall(ISGetBlockSize(iscol, &i)); 3181 PetscCall(ISSetBlockSize(*iscol_d, i)); 3182 3183 /* Get isrow_d */ 3184 PetscCall(ISGetLocalSize(isrow, &m)); 3185 rstart = mat->rmap->rstart; 3186 PetscCall(PetscMalloc1(m, &idx)); 3187 PetscCall(ISGetIndices(isrow, &is_idx)); 3188 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3189 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3190 3191 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3192 PetscCall(ISGetBlockSize(isrow, &i)); 3193 PetscCall(ISSetBlockSize(*isrow_d, i)); 3194 3195 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3196 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3197 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3198 3199 PetscCall(VecDuplicate(lvec, &lcmap)); 3200 3201 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3202 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3203 3204 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3205 /* off-process column indices */ 3206 count = 0; 3207 PetscCall(PetscMalloc1(Bn, &idx)); 3208 PetscCall(PetscMalloc1(Bn, &cmap1)); 3209 3210 PetscCall(VecGetArray(lvec, &xarray)); 3211 PetscCall(VecGetArray(lcmap, &cmaparray)); 3212 for (i = 0; i < Bn; i++) { 3213 if (PetscRealPart(xarray[i]) > -1.0) { 3214 idx[count] = i; /* local column index in off-diagonal part B */ 3215 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3216 count++; 3217 } 3218 } 3219 PetscCall(VecRestoreArray(lvec, &xarray)); 3220 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3221 3222 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3223 /* cannot ensure iscol_o has same blocksize as iscol! */ 3224 3225 PetscCall(PetscFree(idx)); 3226 *garray = cmap1; 3227 3228 PetscCall(VecDestroy(&x)); 3229 PetscCall(VecDestroy(&cmap)); 3230 PetscCall(VecDestroy(&lcmap)); 3231 PetscFunctionReturn(PETSC_SUCCESS); 3232 } 3233 3234 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3235 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3236 { 3237 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3238 Mat M = NULL; 3239 MPI_Comm comm; 3240 IS iscol_d, isrow_d, iscol_o; 3241 Mat Asub = NULL, Bsub = NULL; 3242 PetscInt n; 3243 3244 PetscFunctionBegin; 3245 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3246 3247 if (call == MAT_REUSE_MATRIX) { 3248 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3249 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3250 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3251 3252 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3253 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3254 3255 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3256 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3257 3258 /* Update diagonal and off-diagonal portions of submat */ 3259 asub = (Mat_MPIAIJ *)(*submat)->data; 3260 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3261 PetscCall(ISGetLocalSize(iscol_o, &n)); 3262 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3263 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3264 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3265 3266 } else { /* call == MAT_INITIAL_MATRIX) */ 3267 const PetscInt *garray; 3268 PetscInt BsubN; 3269 3270 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3271 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3272 3273 /* Create local submatrices Asub and Bsub */ 3274 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3275 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3276 3277 /* Create submatrix M */ 3278 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3279 3280 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3281 asub = (Mat_MPIAIJ *)M->data; 3282 3283 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3284 n = asub->B->cmap->N; 3285 if (BsubN > n) { 3286 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3287 const PetscInt *idx; 3288 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3289 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3290 3291 PetscCall(PetscMalloc1(n, &idx_new)); 3292 j = 0; 3293 PetscCall(ISGetIndices(iscol_o, &idx)); 3294 for (i = 0; i < n; i++) { 3295 if (j >= BsubN) break; 3296 while (subgarray[i] > garray[j]) j++; 3297 3298 if (subgarray[i] == garray[j]) { 3299 idx_new[i] = idx[j++]; 3300 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3301 } 3302 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3303 3304 PetscCall(ISDestroy(&iscol_o)); 3305 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3306 3307 } else if (BsubN < n) { 3308 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3309 } 3310 3311 PetscCall(PetscFree(garray)); 3312 *submat = M; 3313 3314 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3315 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3316 PetscCall(ISDestroy(&isrow_d)); 3317 3318 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3319 PetscCall(ISDestroy(&iscol_d)); 3320 3321 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3322 PetscCall(ISDestroy(&iscol_o)); 3323 } 3324 PetscFunctionReturn(PETSC_SUCCESS); 3325 } 3326 3327 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3328 { 3329 IS iscol_local = NULL, isrow_d; 3330 PetscInt csize; 3331 PetscInt n, i, j, start, end; 3332 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3333 MPI_Comm comm; 3334 3335 PetscFunctionBegin; 3336 /* If isrow has same processor distribution as mat, 3337 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3338 if (call == MAT_REUSE_MATRIX) { 3339 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3340 if (isrow_d) { 3341 sameRowDist = PETSC_TRUE; 3342 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3343 } else { 3344 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3345 if (iscol_local) { 3346 sameRowDist = PETSC_TRUE; 3347 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3348 } 3349 } 3350 } else { 3351 /* Check if isrow has same processor distribution as mat */ 3352 sameDist[0] = PETSC_FALSE; 3353 PetscCall(ISGetLocalSize(isrow, &n)); 3354 if (!n) { 3355 sameDist[0] = PETSC_TRUE; 3356 } else { 3357 PetscCall(ISGetMinMax(isrow, &i, &j)); 3358 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3359 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3360 } 3361 3362 /* Check if iscol has same processor distribution as mat */ 3363 sameDist[1] = PETSC_FALSE; 3364 PetscCall(ISGetLocalSize(iscol, &n)); 3365 if (!n) { 3366 sameDist[1] = PETSC_TRUE; 3367 } else { 3368 PetscCall(ISGetMinMax(iscol, &i, &j)); 3369 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3370 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3371 } 3372 3373 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3374 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3375 sameRowDist = tsameDist[0]; 3376 } 3377 3378 if (sameRowDist) { 3379 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3380 /* isrow and iscol have same processor distribution as mat */ 3381 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3382 PetscFunctionReturn(PETSC_SUCCESS); 3383 } else { /* sameRowDist */ 3384 /* isrow has same processor distribution as mat */ 3385 if (call == MAT_INITIAL_MATRIX) { 3386 PetscBool sorted; 3387 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3388 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3389 PetscCall(ISGetSize(iscol, &i)); 3390 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3391 3392 PetscCall(ISSorted(iscol_local, &sorted)); 3393 if (sorted) { 3394 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3395 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3396 PetscFunctionReturn(PETSC_SUCCESS); 3397 } 3398 } else { /* call == MAT_REUSE_MATRIX */ 3399 IS iscol_sub; 3400 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3401 if (iscol_sub) { 3402 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3403 PetscFunctionReturn(PETSC_SUCCESS); 3404 } 3405 } 3406 } 3407 } 3408 3409 /* General case: iscol -> iscol_local which has global size of iscol */ 3410 if (call == MAT_REUSE_MATRIX) { 3411 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3412 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3413 } else { 3414 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3415 } 3416 3417 PetscCall(ISGetLocalSize(iscol, &csize)); 3418 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3419 3420 if (call == MAT_INITIAL_MATRIX) { 3421 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3422 PetscCall(ISDestroy(&iscol_local)); 3423 } 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } 3426 3427 /*@C 3428 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3429 and "off-diagonal" part of the matrix in CSR format. 3430 3431 Collective 3432 3433 Input Parameters: 3434 + comm - MPI communicator 3435 . A - "diagonal" portion of matrix 3436 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3437 - garray - global index of `B` columns 3438 3439 Output Parameter: 3440 . mat - the matrix, with input `A` as its local diagonal matrix 3441 3442 Level: advanced 3443 3444 Notes: 3445 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3446 3447 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3448 3449 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3450 @*/ 3451 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3452 { 3453 Mat_MPIAIJ *maij; 3454 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3455 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3456 const PetscScalar *oa; 3457 Mat Bnew; 3458 PetscInt m, n, N; 3459 MatType mpi_mat_type; 3460 3461 PetscFunctionBegin; 3462 PetscCall(MatCreate(comm, mat)); 3463 PetscCall(MatGetSize(A, &m, &n)); 3464 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3465 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3466 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3467 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3468 3469 /* Get global columns of mat */ 3470 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3471 3472 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3473 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3474 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3475 PetscCall(MatSetType(*mat, mpi_mat_type)); 3476 3477 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3478 maij = (Mat_MPIAIJ *)(*mat)->data; 3479 3480 (*mat)->preallocated = PETSC_TRUE; 3481 3482 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3483 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3484 3485 /* Set A as diagonal portion of *mat */ 3486 maij->A = A; 3487 3488 nz = oi[m]; 3489 for (i = 0; i < nz; i++) { 3490 col = oj[i]; 3491 oj[i] = garray[col]; 3492 } 3493 3494 /* Set Bnew as off-diagonal portion of *mat */ 3495 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3496 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3497 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3498 bnew = (Mat_SeqAIJ *)Bnew->data; 3499 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3500 maij->B = Bnew; 3501 3502 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3503 3504 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3505 b->free_a = PETSC_FALSE; 3506 b->free_ij = PETSC_FALSE; 3507 PetscCall(MatDestroy(&B)); 3508 3509 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3510 bnew->free_a = PETSC_TRUE; 3511 bnew->free_ij = PETSC_TRUE; 3512 3513 /* condense columns of maij->B */ 3514 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3515 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3516 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3517 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3518 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3519 PetscFunctionReturn(PETSC_SUCCESS); 3520 } 3521 3522 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3523 3524 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3525 { 3526 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3527 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3528 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3529 Mat M, Msub, B = a->B; 3530 MatScalar *aa; 3531 Mat_SeqAIJ *aij; 3532 PetscInt *garray = a->garray, *colsub, Ncols; 3533 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3534 IS iscol_sub, iscmap; 3535 const PetscInt *is_idx, *cmap; 3536 PetscBool allcolumns = PETSC_FALSE; 3537 MPI_Comm comm; 3538 3539 PetscFunctionBegin; 3540 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3541 if (call == MAT_REUSE_MATRIX) { 3542 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3543 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3544 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3545 3546 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3547 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3548 3549 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3550 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3551 3552 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3553 3554 } else { /* call == MAT_INITIAL_MATRIX) */ 3555 PetscBool flg; 3556 3557 PetscCall(ISGetLocalSize(iscol, &n)); 3558 PetscCall(ISGetSize(iscol, &Ncols)); 3559 3560 /* (1) iscol -> nonscalable iscol_local */ 3561 /* Check for special case: each processor gets entire matrix columns */ 3562 PetscCall(ISIdentity(iscol_local, &flg)); 3563 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3564 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3565 if (allcolumns) { 3566 iscol_sub = iscol_local; 3567 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3568 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3569 3570 } else { 3571 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3572 PetscInt *idx, *cmap1, k; 3573 PetscCall(PetscMalloc1(Ncols, &idx)); 3574 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3575 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3576 count = 0; 3577 k = 0; 3578 for (i = 0; i < Ncols; i++) { 3579 j = is_idx[i]; 3580 if (j >= cstart && j < cend) { 3581 /* diagonal part of mat */ 3582 idx[count] = j; 3583 cmap1[count++] = i; /* column index in submat */ 3584 } else if (Bn) { 3585 /* off-diagonal part of mat */ 3586 if (j == garray[k]) { 3587 idx[count] = j; 3588 cmap1[count++] = i; /* column index in submat */ 3589 } else if (j > garray[k]) { 3590 while (j > garray[k] && k < Bn - 1) k++; 3591 if (j == garray[k]) { 3592 idx[count] = j; 3593 cmap1[count++] = i; /* column index in submat */ 3594 } 3595 } 3596 } 3597 } 3598 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3599 3600 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3601 PetscCall(ISGetBlockSize(iscol, &cbs)); 3602 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3603 3604 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3605 } 3606 3607 /* (3) Create sequential Msub */ 3608 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3609 } 3610 3611 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3612 aij = (Mat_SeqAIJ *)(Msub)->data; 3613 ii = aij->i; 3614 PetscCall(ISGetIndices(iscmap, &cmap)); 3615 3616 /* 3617 m - number of local rows 3618 Ncols - number of columns (same on all processors) 3619 rstart - first row in new global matrix generated 3620 */ 3621 PetscCall(MatGetSize(Msub, &m, NULL)); 3622 3623 if (call == MAT_INITIAL_MATRIX) { 3624 /* (4) Create parallel newmat */ 3625 PetscMPIInt rank, size; 3626 PetscInt csize; 3627 3628 PetscCallMPI(MPI_Comm_size(comm, &size)); 3629 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3630 3631 /* 3632 Determine the number of non-zeros in the diagonal and off-diagonal 3633 portions of the matrix in order to do correct preallocation 3634 */ 3635 3636 /* first get start and end of "diagonal" columns */ 3637 PetscCall(ISGetLocalSize(iscol, &csize)); 3638 if (csize == PETSC_DECIDE) { 3639 PetscCall(ISGetSize(isrow, &mglobal)); 3640 if (mglobal == Ncols) { /* square matrix */ 3641 nlocal = m; 3642 } else { 3643 nlocal = Ncols / size + ((Ncols % size) > rank); 3644 } 3645 } else { 3646 nlocal = csize; 3647 } 3648 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3649 rstart = rend - nlocal; 3650 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3651 3652 /* next, compute all the lengths */ 3653 jj = aij->j; 3654 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3655 olens = dlens + m; 3656 for (i = 0; i < m; i++) { 3657 jend = ii[i + 1] - ii[i]; 3658 olen = 0; 3659 dlen = 0; 3660 for (j = 0; j < jend; j++) { 3661 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3662 else dlen++; 3663 jj++; 3664 } 3665 olens[i] = olen; 3666 dlens[i] = dlen; 3667 } 3668 3669 PetscCall(ISGetBlockSize(isrow, &bs)); 3670 PetscCall(ISGetBlockSize(iscol, &cbs)); 3671 3672 PetscCall(MatCreate(comm, &M)); 3673 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3674 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3675 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3676 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3677 PetscCall(PetscFree(dlens)); 3678 3679 } else { /* call == MAT_REUSE_MATRIX */ 3680 M = *newmat; 3681 PetscCall(MatGetLocalSize(M, &i, NULL)); 3682 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3683 PetscCall(MatZeroEntries(M)); 3684 /* 3685 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3686 rather than the slower MatSetValues(). 3687 */ 3688 M->was_assembled = PETSC_TRUE; 3689 M->assembled = PETSC_FALSE; 3690 } 3691 3692 /* (5) Set values of Msub to *newmat */ 3693 PetscCall(PetscMalloc1(count, &colsub)); 3694 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3695 3696 jj = aij->j; 3697 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3698 for (i = 0; i < m; i++) { 3699 row = rstart + i; 3700 nz = ii[i + 1] - ii[i]; 3701 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3702 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3703 jj += nz; 3704 aa += nz; 3705 } 3706 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3707 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3708 3709 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3710 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3711 3712 PetscCall(PetscFree(colsub)); 3713 3714 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3715 if (call == MAT_INITIAL_MATRIX) { 3716 *newmat = M; 3717 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3718 PetscCall(MatDestroy(&Msub)); 3719 3720 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3721 PetscCall(ISDestroy(&iscol_sub)); 3722 3723 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3724 PetscCall(ISDestroy(&iscmap)); 3725 3726 if (iscol_local) { 3727 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3728 PetscCall(ISDestroy(&iscol_local)); 3729 } 3730 } 3731 PetscFunctionReturn(PETSC_SUCCESS); 3732 } 3733 3734 /* 3735 Not great since it makes two copies of the submatrix, first an SeqAIJ 3736 in local and then by concatenating the local matrices the end result. 3737 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3738 3739 This requires a sequential iscol with all indices. 3740 */ 3741 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3742 { 3743 PetscMPIInt rank, size; 3744 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3745 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3746 Mat M, Mreuse; 3747 MatScalar *aa, *vwork; 3748 MPI_Comm comm; 3749 Mat_SeqAIJ *aij; 3750 PetscBool colflag, allcolumns = PETSC_FALSE; 3751 3752 PetscFunctionBegin; 3753 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3754 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3755 PetscCallMPI(MPI_Comm_size(comm, &size)); 3756 3757 /* Check for special case: each processor gets entire matrix columns */ 3758 PetscCall(ISIdentity(iscol, &colflag)); 3759 PetscCall(ISGetLocalSize(iscol, &n)); 3760 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3761 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3762 3763 if (call == MAT_REUSE_MATRIX) { 3764 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3765 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3766 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3767 } else { 3768 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3769 } 3770 3771 /* 3772 m - number of local rows 3773 n - number of columns (same on all processors) 3774 rstart - first row in new global matrix generated 3775 */ 3776 PetscCall(MatGetSize(Mreuse, &m, &n)); 3777 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3778 if (call == MAT_INITIAL_MATRIX) { 3779 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3780 ii = aij->i; 3781 jj = aij->j; 3782 3783 /* 3784 Determine the number of non-zeros in the diagonal and off-diagonal 3785 portions of the matrix in order to do correct preallocation 3786 */ 3787 3788 /* first get start and end of "diagonal" columns */ 3789 if (csize == PETSC_DECIDE) { 3790 PetscCall(ISGetSize(isrow, &mglobal)); 3791 if (mglobal == n) { /* square matrix */ 3792 nlocal = m; 3793 } else { 3794 nlocal = n / size + ((n % size) > rank); 3795 } 3796 } else { 3797 nlocal = csize; 3798 } 3799 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3800 rstart = rend - nlocal; 3801 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3802 3803 /* next, compute all the lengths */ 3804 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3805 olens = dlens + m; 3806 for (i = 0; i < m; i++) { 3807 jend = ii[i + 1] - ii[i]; 3808 olen = 0; 3809 dlen = 0; 3810 for (j = 0; j < jend; j++) { 3811 if (*jj < rstart || *jj >= rend) olen++; 3812 else dlen++; 3813 jj++; 3814 } 3815 olens[i] = olen; 3816 dlens[i] = dlen; 3817 } 3818 PetscCall(MatCreate(comm, &M)); 3819 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3820 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3821 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3822 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3823 PetscCall(PetscFree(dlens)); 3824 } else { 3825 PetscInt ml, nl; 3826 3827 M = *newmat; 3828 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3829 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3830 PetscCall(MatZeroEntries(M)); 3831 /* 3832 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3833 rather than the slower MatSetValues(). 3834 */ 3835 M->was_assembled = PETSC_TRUE; 3836 M->assembled = PETSC_FALSE; 3837 } 3838 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3839 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3840 ii = aij->i; 3841 jj = aij->j; 3842 3843 /* trigger copy to CPU if needed */ 3844 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3845 for (i = 0; i < m; i++) { 3846 row = rstart + i; 3847 nz = ii[i + 1] - ii[i]; 3848 cwork = jj; 3849 jj += nz; 3850 vwork = aa; 3851 aa += nz; 3852 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3853 } 3854 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3855 3856 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3857 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3858 *newmat = M; 3859 3860 /* save submatrix used in processor for next request */ 3861 if (call == MAT_INITIAL_MATRIX) { 3862 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3863 PetscCall(MatDestroy(&Mreuse)); 3864 } 3865 PetscFunctionReturn(PETSC_SUCCESS); 3866 } 3867 3868 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3869 { 3870 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3871 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3872 const PetscInt *JJ; 3873 PetscBool nooffprocentries; 3874 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3875 3876 PetscFunctionBegin; 3877 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3878 3879 PetscCall(PetscLayoutSetUp(B->rmap)); 3880 PetscCall(PetscLayoutSetUp(B->cmap)); 3881 m = B->rmap->n; 3882 cstart = B->cmap->rstart; 3883 cend = B->cmap->rend; 3884 rstart = B->rmap->rstart; 3885 3886 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3887 3888 if (PetscDefined(USE_DEBUG)) { 3889 for (i = 0; i < m; i++) { 3890 nnz = Ii[i + 1] - Ii[i]; 3891 JJ = J + Ii[i]; 3892 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3893 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3894 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3895 } 3896 } 3897 3898 for (i = 0; i < m; i++) { 3899 nnz = Ii[i + 1] - Ii[i]; 3900 JJ = J + Ii[i]; 3901 nnz_max = PetscMax(nnz_max, nnz); 3902 d = 0; 3903 for (j = 0; j < nnz; j++) { 3904 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3905 } 3906 d_nnz[i] = d; 3907 o_nnz[i] = nnz - d; 3908 } 3909 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3910 PetscCall(PetscFree2(d_nnz, o_nnz)); 3911 3912 for (i = 0; i < m; i++) { 3913 ii = i + rstart; 3914 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3915 } 3916 nooffprocentries = B->nooffprocentries; 3917 B->nooffprocentries = PETSC_TRUE; 3918 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3919 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3920 B->nooffprocentries = nooffprocentries; 3921 3922 /* count number of entries below block diagonal */ 3923 PetscCall(PetscFree(Aij->ld)); 3924 PetscCall(PetscCalloc1(m, &ld)); 3925 Aij->ld = ld; 3926 for (i = 0; i < m; i++) { 3927 nnz = Ii[i + 1] - Ii[i]; 3928 j = 0; 3929 while (j < nnz && J[j] < cstart) j++; 3930 ld[i] = j; 3931 J += nnz; 3932 } 3933 3934 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3935 PetscFunctionReturn(PETSC_SUCCESS); 3936 } 3937 3938 /*@ 3939 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3940 (the default parallel PETSc format). 3941 3942 Collective 3943 3944 Input Parameters: 3945 + B - the matrix 3946 . i - the indices into j for the start of each local row (starts with zero) 3947 . j - the column indices for each local row (starts with zero) 3948 - v - optional values in the matrix 3949 3950 Level: developer 3951 3952 Notes: 3953 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3954 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3955 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3956 3957 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3958 3959 The format which is used for the sparse matrix input, is equivalent to a 3960 row-major ordering.. i.e for the following matrix, the input data expected is 3961 as shown 3962 3963 .vb 3964 1 0 0 3965 2 0 3 P0 3966 ------- 3967 4 5 6 P1 3968 3969 Process0 [P0] rows_owned=[0,1] 3970 i = {0,1,3} [size = nrow+1 = 2+1] 3971 j = {0,0,2} [size = 3] 3972 v = {1,2,3} [size = 3] 3973 3974 Process1 [P1] rows_owned=[2] 3975 i = {0,3} [size = nrow+1 = 1+1] 3976 j = {0,1,2} [size = 3] 3977 v = {4,5,6} [size = 3] 3978 .ve 3979 3980 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3981 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3982 @*/ 3983 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3984 { 3985 PetscFunctionBegin; 3986 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3987 PetscFunctionReturn(PETSC_SUCCESS); 3988 } 3989 3990 /*@C 3991 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3992 (the default parallel PETSc format). For good matrix assembly performance 3993 the user should preallocate the matrix storage by setting the parameters 3994 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 3995 3996 Collective 3997 3998 Input Parameters: 3999 + B - the matrix 4000 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4001 (same value is used for all local rows) 4002 . d_nnz - array containing the number of nonzeros in the various rows of the 4003 DIAGONAL portion of the local submatrix (possibly different for each row) 4004 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4005 The size of this array is equal to the number of local rows, i.e 'm'. 4006 For matrices that will be factored, you must leave room for (and set) 4007 the diagonal entry even if it is zero. 4008 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4009 submatrix (same value is used for all local rows). 4010 - o_nnz - array containing the number of nonzeros in the various rows of the 4011 OFF-DIAGONAL portion of the local submatrix (possibly different for 4012 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4013 structure. The size of this array is equal to the number 4014 of local rows, i.e 'm'. 4015 4016 Usage: 4017 Consider the following 8x8 matrix with 34 non-zero values, that is 4018 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4019 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4020 as follows 4021 4022 .vb 4023 1 2 0 | 0 3 0 | 0 4 4024 Proc0 0 5 6 | 7 0 0 | 8 0 4025 9 0 10 | 11 0 0 | 12 0 4026 ------------------------------------- 4027 13 0 14 | 15 16 17 | 0 0 4028 Proc1 0 18 0 | 19 20 21 | 0 0 4029 0 0 0 | 22 23 0 | 24 0 4030 ------------------------------------- 4031 Proc2 25 26 27 | 0 0 28 | 29 0 4032 30 0 0 | 31 32 33 | 0 34 4033 .ve 4034 4035 This can be represented as a collection of submatrices as 4036 .vb 4037 A B C 4038 D E F 4039 G H I 4040 .ve 4041 4042 Where the submatrices A,B,C are owned by proc0, D,E,F are 4043 owned by proc1, G,H,I are owned by proc2. 4044 4045 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4046 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4047 The 'M','N' parameters are 8,8, and have the same values on all procs. 4048 4049 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4050 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4051 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4052 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4053 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4054 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4055 4056 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4057 allocated for every row of the local diagonal submatrix, and `o_nz` 4058 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4059 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4060 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4061 In this case, the values of `d_nz`, `o_nz` are 4062 .vb 4063 proc0 dnz = 2, o_nz = 2 4064 proc1 dnz = 3, o_nz = 2 4065 proc2 dnz = 1, o_nz = 4 4066 .ve 4067 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4068 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4069 for proc3. i.e we are using 12+15+10=37 storage locations to store 4070 34 values. 4071 4072 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4073 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4074 In the above case the values for `d_nnz`, `o_nnz` are 4075 .vb 4076 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4077 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4078 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4079 .ve 4080 Here the space allocated is sum of all the above values i.e 34, and 4081 hence pre-allocation is perfect. 4082 4083 Level: intermediate 4084 4085 Notes: 4086 If the *_nnz parameter is given then the *_nz parameter is ignored 4087 4088 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4089 storage. The stored row and column indices begin with zero. 4090 See [Sparse Matrices](sec_matsparse) for details. 4091 4092 The parallel matrix is partitioned such that the first m0 rows belong to 4093 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4094 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4095 4096 The DIAGONAL portion of the local submatrix of a processor can be defined 4097 as the submatrix which is obtained by extraction the part corresponding to 4098 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4099 first row that belongs to the processor, r2 is the last row belonging to 4100 the this processor, and c1-c2 is range of indices of the local part of a 4101 vector suitable for applying the matrix to. This is an mxn matrix. In the 4102 common case of a square matrix, the row and column ranges are the same and 4103 the DIAGONAL part is also square. The remaining portion of the local 4104 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4105 4106 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4107 4108 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4109 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4110 You can also run with the option `-info` and look for messages with the string 4111 malloc in them to see if additional memory allocation was needed. 4112 4113 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4114 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4115 @*/ 4116 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4117 { 4118 PetscFunctionBegin; 4119 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4120 PetscValidType(B, 1); 4121 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4122 PetscFunctionReturn(PETSC_SUCCESS); 4123 } 4124 4125 /*@ 4126 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4127 CSR format for the local rows. 4128 4129 Collective 4130 4131 Input Parameters: 4132 + comm - MPI communicator 4133 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4134 . n - This value should be the same as the local size used in creating the 4135 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4136 calculated if N is given) For square matrices n is almost always m. 4137 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4138 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4139 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4140 . j - column indices 4141 - a - optional matrix values 4142 4143 Output Parameter: 4144 . mat - the matrix 4145 4146 Level: intermediate 4147 4148 Notes: 4149 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4150 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4151 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4152 4153 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4154 4155 The format which is used for the sparse matrix input, is equivalent to a 4156 row-major ordering.. i.e for the following matrix, the input data expected is 4157 as shown 4158 4159 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4160 .vb 4161 1 0 0 4162 2 0 3 P0 4163 ------- 4164 4 5 6 P1 4165 4166 Process0 [P0] rows_owned=[0,1] 4167 i = {0,1,3} [size = nrow+1 = 2+1] 4168 j = {0,0,2} [size = 3] 4169 v = {1,2,3} [size = 3] 4170 4171 Process1 [P1] rows_owned=[2] 4172 i = {0,3} [size = nrow+1 = 1+1] 4173 j = {0,1,2} [size = 3] 4174 v = {4,5,6} [size = 3] 4175 .ve 4176 4177 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4178 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4179 @*/ 4180 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4181 { 4182 PetscFunctionBegin; 4183 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4184 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4185 PetscCall(MatCreate(comm, mat)); 4186 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4187 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4188 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4189 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4190 PetscFunctionReturn(PETSC_SUCCESS); 4191 } 4192 4193 /*@ 4194 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4195 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4196 from `MatCreateMPIAIJWithArrays()` 4197 4198 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4199 4200 Collective 4201 4202 Input Parameters: 4203 + mat - the matrix 4204 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4205 . n - This value should be the same as the local size used in creating the 4206 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4207 calculated if N is given) For square matrices n is almost always m. 4208 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4209 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4210 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4211 . J - column indices 4212 - v - matrix values 4213 4214 Level: deprecated 4215 4216 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4217 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4218 @*/ 4219 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4220 { 4221 PetscInt nnz, i; 4222 PetscBool nooffprocentries; 4223 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4224 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4225 PetscScalar *ad, *ao; 4226 PetscInt ldi, Iii, md; 4227 const PetscInt *Adi = Ad->i; 4228 PetscInt *ld = Aij->ld; 4229 4230 PetscFunctionBegin; 4231 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4232 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4233 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4234 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4235 4236 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4237 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4238 4239 for (i = 0; i < m; i++) { 4240 nnz = Ii[i + 1] - Ii[i]; 4241 Iii = Ii[i]; 4242 ldi = ld[i]; 4243 md = Adi[i + 1] - Adi[i]; 4244 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4245 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4246 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4247 ad += md; 4248 ao += nnz - md; 4249 } 4250 nooffprocentries = mat->nooffprocentries; 4251 mat->nooffprocentries = PETSC_TRUE; 4252 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4253 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4254 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4255 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4256 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4257 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4258 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4259 mat->nooffprocentries = nooffprocentries; 4260 PetscFunctionReturn(PETSC_SUCCESS); 4261 } 4262 4263 /*@ 4264 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4265 4266 Collective 4267 4268 Input Parameters: 4269 + mat - the matrix 4270 - v - matrix values, stored by row 4271 4272 Level: intermediate 4273 4274 Note: 4275 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4276 4277 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4278 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4279 @*/ 4280 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4281 { 4282 PetscInt nnz, i, m; 4283 PetscBool nooffprocentries; 4284 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4285 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4286 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4287 PetscScalar *ad, *ao; 4288 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4289 PetscInt ldi, Iii, md; 4290 PetscInt *ld = Aij->ld; 4291 4292 PetscFunctionBegin; 4293 m = mat->rmap->n; 4294 4295 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4296 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4297 Iii = 0; 4298 for (i = 0; i < m; i++) { 4299 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4300 ldi = ld[i]; 4301 md = Adi[i + 1] - Adi[i]; 4302 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4303 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4304 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4305 ad += md; 4306 ao += nnz - md; 4307 Iii += nnz; 4308 } 4309 nooffprocentries = mat->nooffprocentries; 4310 mat->nooffprocentries = PETSC_TRUE; 4311 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4312 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4313 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4314 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4315 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4316 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4317 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4318 mat->nooffprocentries = nooffprocentries; 4319 PetscFunctionReturn(PETSC_SUCCESS); 4320 } 4321 4322 /*@C 4323 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4324 (the default parallel PETSc format). For good matrix assembly performance 4325 the user should preallocate the matrix storage by setting the parameters 4326 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4327 4328 Collective 4329 4330 Input Parameters: 4331 + comm - MPI communicator 4332 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4333 This value should be the same as the local size used in creating the 4334 y vector for the matrix-vector product y = Ax. 4335 . n - This value should be the same as the local size used in creating the 4336 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4337 calculated if N is given) For square matrices n is almost always m. 4338 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4339 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4340 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4341 (same value is used for all local rows) 4342 . d_nnz - array containing the number of nonzeros in the various rows of the 4343 DIAGONAL portion of the local submatrix (possibly different for each row) 4344 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4345 The size of this array is equal to the number of local rows, i.e 'm'. 4346 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4347 submatrix (same value is used for all local rows). 4348 - o_nnz - array containing the number of nonzeros in the various rows of the 4349 OFF-DIAGONAL portion of the local submatrix (possibly different for 4350 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4351 structure. The size of this array is equal to the number 4352 of local rows, i.e 'm'. 4353 4354 Output Parameter: 4355 . A - the matrix 4356 4357 Options Database Keys: 4358 + -mat_no_inode - Do not use inodes 4359 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4360 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4361 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4362 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4363 4364 Level: intermediate 4365 4366 Notes: 4367 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4368 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4369 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4370 4371 If the *_nnz parameter is given then the *_nz parameter is ignored 4372 4373 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4374 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4375 storage requirements for this matrix. 4376 4377 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4378 processor than it must be used on all processors that share the object for 4379 that argument. 4380 4381 The user MUST specify either the local or global matrix dimensions 4382 (possibly both). 4383 4384 The parallel matrix is partitioned across processors such that the 4385 first m0 rows belong to process 0, the next m1 rows belong to 4386 process 1, the next m2 rows belong to process 2 etc.. where 4387 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4388 values corresponding to [m x N] submatrix. 4389 4390 The columns are logically partitioned with the n0 columns belonging 4391 to 0th partition, the next n1 columns belonging to the next 4392 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4393 4394 The DIAGONAL portion of the local submatrix on any given processor 4395 is the submatrix corresponding to the rows and columns m,n 4396 corresponding to the given processor. i.e diagonal matrix on 4397 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4398 etc. The remaining portion of the local submatrix [m x (N-n)] 4399 constitute the OFF-DIAGONAL portion. The example below better 4400 illustrates this concept. 4401 4402 For a square global matrix we define each processor's diagonal portion 4403 to be its local rows and the corresponding columns (a square submatrix); 4404 each processor's off-diagonal portion encompasses the remainder of the 4405 local matrix (a rectangular submatrix). 4406 4407 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4408 4409 When calling this routine with a single process communicator, a matrix of 4410 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4411 type of communicator, use the construction mechanism 4412 .vb 4413 MatCreate(...,&A); 4414 MatSetType(A,MATMPIAIJ); 4415 MatSetSizes(A, m,n,M,N); 4416 MatMPIAIJSetPreallocation(A,...); 4417 .ve 4418 4419 By default, this format uses inodes (identical nodes) when possible. 4420 We search for consecutive rows with the same nonzero structure, thereby 4421 reusing matrix information to achieve increased efficiency. 4422 4423 Usage: 4424 Consider the following 8x8 matrix with 34 non-zero values, that is 4425 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4426 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4427 as follows 4428 4429 .vb 4430 1 2 0 | 0 3 0 | 0 4 4431 Proc0 0 5 6 | 7 0 0 | 8 0 4432 9 0 10 | 11 0 0 | 12 0 4433 ------------------------------------- 4434 13 0 14 | 15 16 17 | 0 0 4435 Proc1 0 18 0 | 19 20 21 | 0 0 4436 0 0 0 | 22 23 0 | 24 0 4437 ------------------------------------- 4438 Proc2 25 26 27 | 0 0 28 | 29 0 4439 30 0 0 | 31 32 33 | 0 34 4440 .ve 4441 4442 This can be represented as a collection of submatrices as 4443 4444 .vb 4445 A B C 4446 D E F 4447 G H I 4448 .ve 4449 4450 Where the submatrices A,B,C are owned by proc0, D,E,F are 4451 owned by proc1, G,H,I are owned by proc2. 4452 4453 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4454 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4455 The 'M','N' parameters are 8,8, and have the same values on all procs. 4456 4457 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4458 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4459 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4460 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4461 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4462 matrix, ans [DF] as another SeqAIJ matrix. 4463 4464 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4465 allocated for every row of the local diagonal submatrix, and `o_nz` 4466 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4467 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4468 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4469 In this case, the values of `d_nz`,`o_nz` are 4470 .vb 4471 proc0 dnz = 2, o_nz = 2 4472 proc1 dnz = 3, o_nz = 2 4473 proc2 dnz = 1, o_nz = 4 4474 .ve 4475 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4476 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4477 for proc3. i.e we are using 12+15+10=37 storage locations to store 4478 34 values. 4479 4480 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4481 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4482 In the above case the values for d_nnz,o_nnz are 4483 .vb 4484 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4485 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4486 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4487 .ve 4488 Here the space allocated is sum of all the above values i.e 34, and 4489 hence pre-allocation is perfect. 4490 4491 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4492 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4493 @*/ 4494 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4495 { 4496 PetscMPIInt size; 4497 4498 PetscFunctionBegin; 4499 PetscCall(MatCreate(comm, A)); 4500 PetscCall(MatSetSizes(*A, m, n, M, N)); 4501 PetscCallMPI(MPI_Comm_size(comm, &size)); 4502 if (size > 1) { 4503 PetscCall(MatSetType(*A, MATMPIAIJ)); 4504 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4505 } else { 4506 PetscCall(MatSetType(*A, MATSEQAIJ)); 4507 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4508 } 4509 PetscFunctionReturn(PETSC_SUCCESS); 4510 } 4511 4512 /*MC 4513 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4514 4515 Synopsis: 4516 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4517 4518 Not Collective 4519 4520 Input Parameter: 4521 . A - the `MATMPIAIJ` matrix 4522 4523 Output Parameters: 4524 + Ad - the diagonal portion of the matrix 4525 . Ao - the off diagonal portion of the matrix 4526 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4527 - ierr - error code 4528 4529 Level: advanced 4530 4531 Note: 4532 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4533 4534 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4535 M*/ 4536 4537 /*MC 4538 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4539 4540 Synopsis: 4541 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4542 4543 Not Collective 4544 4545 Input Parameters: 4546 + A - the `MATMPIAIJ` matrix 4547 . Ad - the diagonal portion of the matrix 4548 . Ao - the off diagonal portion of the matrix 4549 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4550 - ierr - error code 4551 4552 Level: advanced 4553 4554 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4555 M*/ 4556 4557 /*@C 4558 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4559 4560 Not Collective 4561 4562 Input Parameter: 4563 . A - The `MATMPIAIJ` matrix 4564 4565 Output Parameters: 4566 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4567 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4568 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4569 4570 Level: intermediate 4571 4572 Note: 4573 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4574 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4575 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4576 local column numbers to global column numbers in the original matrix. 4577 4578 Fortran Note: 4579 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4580 4581 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4582 @*/ 4583 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4584 { 4585 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4586 PetscBool flg; 4587 4588 PetscFunctionBegin; 4589 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4590 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4591 if (Ad) *Ad = a->A; 4592 if (Ao) *Ao = a->B; 4593 if (colmap) *colmap = a->garray; 4594 PetscFunctionReturn(PETSC_SUCCESS); 4595 } 4596 4597 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4598 { 4599 PetscInt m, N, i, rstart, nnz, Ii; 4600 PetscInt *indx; 4601 PetscScalar *values; 4602 MatType rootType; 4603 4604 PetscFunctionBegin; 4605 PetscCall(MatGetSize(inmat, &m, &N)); 4606 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4607 PetscInt *dnz, *onz, sum, bs, cbs; 4608 4609 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4610 /* Check sum(n) = N */ 4611 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4612 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4613 4614 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4615 rstart -= m; 4616 4617 MatPreallocateBegin(comm, m, n, dnz, onz); 4618 for (i = 0; i < m; i++) { 4619 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4620 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4621 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4622 } 4623 4624 PetscCall(MatCreate(comm, outmat)); 4625 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4626 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4627 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4628 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4629 PetscCall(MatSetType(*outmat, rootType)); 4630 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4631 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4632 MatPreallocateEnd(dnz, onz); 4633 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4634 } 4635 4636 /* numeric phase */ 4637 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4638 for (i = 0; i < m; i++) { 4639 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4640 Ii = i + rstart; 4641 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4642 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4643 } 4644 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4645 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4646 PetscFunctionReturn(PETSC_SUCCESS); 4647 } 4648 4649 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4650 { 4651 PetscMPIInt rank; 4652 PetscInt m, N, i, rstart, nnz; 4653 size_t len; 4654 const PetscInt *indx; 4655 PetscViewer out; 4656 char *name; 4657 Mat B; 4658 const PetscScalar *values; 4659 4660 PetscFunctionBegin; 4661 PetscCall(MatGetLocalSize(A, &m, NULL)); 4662 PetscCall(MatGetSize(A, NULL, &N)); 4663 /* Should this be the type of the diagonal block of A? */ 4664 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4665 PetscCall(MatSetSizes(B, m, N, m, N)); 4666 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4667 PetscCall(MatSetType(B, MATSEQAIJ)); 4668 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4669 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4670 for (i = 0; i < m; i++) { 4671 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4672 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4673 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4674 } 4675 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4676 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4677 4678 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4679 PetscCall(PetscStrlen(outfile, &len)); 4680 PetscCall(PetscMalloc1(len + 6, &name)); 4681 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4682 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4683 PetscCall(PetscFree(name)); 4684 PetscCall(MatView(B, out)); 4685 PetscCall(PetscViewerDestroy(&out)); 4686 PetscCall(MatDestroy(&B)); 4687 PetscFunctionReturn(PETSC_SUCCESS); 4688 } 4689 4690 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4691 { 4692 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4693 4694 PetscFunctionBegin; 4695 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4696 PetscCall(PetscFree(merge->id_r)); 4697 PetscCall(PetscFree(merge->len_s)); 4698 PetscCall(PetscFree(merge->len_r)); 4699 PetscCall(PetscFree(merge->bi)); 4700 PetscCall(PetscFree(merge->bj)); 4701 PetscCall(PetscFree(merge->buf_ri[0])); 4702 PetscCall(PetscFree(merge->buf_ri)); 4703 PetscCall(PetscFree(merge->buf_rj[0])); 4704 PetscCall(PetscFree(merge->buf_rj)); 4705 PetscCall(PetscFree(merge->coi)); 4706 PetscCall(PetscFree(merge->coj)); 4707 PetscCall(PetscFree(merge->owners_co)); 4708 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4709 PetscCall(PetscFree(merge)); 4710 PetscFunctionReturn(PETSC_SUCCESS); 4711 } 4712 4713 #include <../src/mat/utils/freespace.h> 4714 #include <petscbt.h> 4715 4716 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4717 { 4718 MPI_Comm comm; 4719 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4720 PetscMPIInt size, rank, taga, *len_s; 4721 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4722 PetscInt proc, m; 4723 PetscInt **buf_ri, **buf_rj; 4724 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4725 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4726 MPI_Request *s_waits, *r_waits; 4727 MPI_Status *status; 4728 const MatScalar *aa, *a_a; 4729 MatScalar **abuf_r, *ba_i; 4730 Mat_Merge_SeqsToMPI *merge; 4731 PetscContainer container; 4732 4733 PetscFunctionBegin; 4734 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4735 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4736 4737 PetscCallMPI(MPI_Comm_size(comm, &size)); 4738 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4739 4740 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4741 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4742 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4743 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4744 aa = a_a; 4745 4746 bi = merge->bi; 4747 bj = merge->bj; 4748 buf_ri = merge->buf_ri; 4749 buf_rj = merge->buf_rj; 4750 4751 PetscCall(PetscMalloc1(size, &status)); 4752 owners = merge->rowmap->range; 4753 len_s = merge->len_s; 4754 4755 /* send and recv matrix values */ 4756 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4757 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4758 4759 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4760 for (proc = 0, k = 0; proc < size; proc++) { 4761 if (!len_s[proc]) continue; 4762 i = owners[proc]; 4763 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4764 k++; 4765 } 4766 4767 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4768 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4769 PetscCall(PetscFree(status)); 4770 4771 PetscCall(PetscFree(s_waits)); 4772 PetscCall(PetscFree(r_waits)); 4773 4774 /* insert mat values of mpimat */ 4775 PetscCall(PetscMalloc1(N, &ba_i)); 4776 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4777 4778 for (k = 0; k < merge->nrecv; k++) { 4779 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4780 nrows = *(buf_ri_k[k]); 4781 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4782 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4783 } 4784 4785 /* set values of ba */ 4786 m = merge->rowmap->n; 4787 for (i = 0; i < m; i++) { 4788 arow = owners[rank] + i; 4789 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4790 bnzi = bi[i + 1] - bi[i]; 4791 PetscCall(PetscArrayzero(ba_i, bnzi)); 4792 4793 /* add local non-zero vals of this proc's seqmat into ba */ 4794 anzi = ai[arow + 1] - ai[arow]; 4795 aj = a->j + ai[arow]; 4796 aa = a_a + ai[arow]; 4797 nextaj = 0; 4798 for (j = 0; nextaj < anzi; j++) { 4799 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4800 ba_i[j] += aa[nextaj++]; 4801 } 4802 } 4803 4804 /* add received vals into ba */ 4805 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4806 /* i-th row */ 4807 if (i == *nextrow[k]) { 4808 anzi = *(nextai[k] + 1) - *nextai[k]; 4809 aj = buf_rj[k] + *(nextai[k]); 4810 aa = abuf_r[k] + *(nextai[k]); 4811 nextaj = 0; 4812 for (j = 0; nextaj < anzi; j++) { 4813 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4814 ba_i[j] += aa[nextaj++]; 4815 } 4816 } 4817 nextrow[k]++; 4818 nextai[k]++; 4819 } 4820 } 4821 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4822 } 4823 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4824 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4825 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4826 4827 PetscCall(PetscFree(abuf_r[0])); 4828 PetscCall(PetscFree(abuf_r)); 4829 PetscCall(PetscFree(ba_i)); 4830 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4831 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4832 PetscFunctionReturn(PETSC_SUCCESS); 4833 } 4834 4835 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4836 { 4837 Mat B_mpi; 4838 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4839 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4840 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4841 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4842 PetscInt len, proc, *dnz, *onz, bs, cbs; 4843 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4844 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4845 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4846 MPI_Status *status; 4847 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4848 PetscBT lnkbt; 4849 Mat_Merge_SeqsToMPI *merge; 4850 PetscContainer container; 4851 4852 PetscFunctionBegin; 4853 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4854 4855 /* make sure it is a PETSc comm */ 4856 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4857 PetscCallMPI(MPI_Comm_size(comm, &size)); 4858 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4859 4860 PetscCall(PetscNew(&merge)); 4861 PetscCall(PetscMalloc1(size, &status)); 4862 4863 /* determine row ownership */ 4864 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4865 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4866 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4867 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4868 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4869 PetscCall(PetscMalloc1(size, &len_si)); 4870 PetscCall(PetscMalloc1(size, &merge->len_s)); 4871 4872 m = merge->rowmap->n; 4873 owners = merge->rowmap->range; 4874 4875 /* determine the number of messages to send, their lengths */ 4876 len_s = merge->len_s; 4877 4878 len = 0; /* length of buf_si[] */ 4879 merge->nsend = 0; 4880 for (proc = 0; proc < size; proc++) { 4881 len_si[proc] = 0; 4882 if (proc == rank) { 4883 len_s[proc] = 0; 4884 } else { 4885 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4886 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4887 } 4888 if (len_s[proc]) { 4889 merge->nsend++; 4890 nrows = 0; 4891 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4892 if (ai[i + 1] > ai[i]) nrows++; 4893 } 4894 len_si[proc] = 2 * (nrows + 1); 4895 len += len_si[proc]; 4896 } 4897 } 4898 4899 /* determine the number and length of messages to receive for ij-structure */ 4900 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4901 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4902 4903 /* post the Irecv of j-structure */ 4904 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4905 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4906 4907 /* post the Isend of j-structure */ 4908 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4909 4910 for (proc = 0, k = 0; proc < size; proc++) { 4911 if (!len_s[proc]) continue; 4912 i = owners[proc]; 4913 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4914 k++; 4915 } 4916 4917 /* receives and sends of j-structure are complete */ 4918 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4919 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4920 4921 /* send and recv i-structure */ 4922 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4923 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4924 4925 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4926 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4927 for (proc = 0, k = 0; proc < size; proc++) { 4928 if (!len_s[proc]) continue; 4929 /* form outgoing message for i-structure: 4930 buf_si[0]: nrows to be sent 4931 [1:nrows]: row index (global) 4932 [nrows+1:2*nrows+1]: i-structure index 4933 */ 4934 nrows = len_si[proc] / 2 - 1; 4935 buf_si_i = buf_si + nrows + 1; 4936 buf_si[0] = nrows; 4937 buf_si_i[0] = 0; 4938 nrows = 0; 4939 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4940 anzi = ai[i + 1] - ai[i]; 4941 if (anzi) { 4942 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4943 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4944 nrows++; 4945 } 4946 } 4947 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4948 k++; 4949 buf_si += len_si[proc]; 4950 } 4951 4952 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4953 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4954 4955 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4956 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4957 4958 PetscCall(PetscFree(len_si)); 4959 PetscCall(PetscFree(len_ri)); 4960 PetscCall(PetscFree(rj_waits)); 4961 PetscCall(PetscFree2(si_waits, sj_waits)); 4962 PetscCall(PetscFree(ri_waits)); 4963 PetscCall(PetscFree(buf_s)); 4964 PetscCall(PetscFree(status)); 4965 4966 /* compute a local seq matrix in each processor */ 4967 /* allocate bi array and free space for accumulating nonzero column info */ 4968 PetscCall(PetscMalloc1(m + 1, &bi)); 4969 bi[0] = 0; 4970 4971 /* create and initialize a linked list */ 4972 nlnk = N + 1; 4973 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4974 4975 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4976 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4977 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4978 4979 current_space = free_space; 4980 4981 /* determine symbolic info for each local row */ 4982 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4983 4984 for (k = 0; k < merge->nrecv; k++) { 4985 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4986 nrows = *buf_ri_k[k]; 4987 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4988 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4989 } 4990 4991 MatPreallocateBegin(comm, m, n, dnz, onz); 4992 len = 0; 4993 for (i = 0; i < m; i++) { 4994 bnzi = 0; 4995 /* add local non-zero cols of this proc's seqmat into lnk */ 4996 arow = owners[rank] + i; 4997 anzi = ai[arow + 1] - ai[arow]; 4998 aj = a->j + ai[arow]; 4999 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5000 bnzi += nlnk; 5001 /* add received col data into lnk */ 5002 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5003 if (i == *nextrow[k]) { /* i-th row */ 5004 anzi = *(nextai[k] + 1) - *nextai[k]; 5005 aj = buf_rj[k] + *nextai[k]; 5006 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5007 bnzi += nlnk; 5008 nextrow[k]++; 5009 nextai[k]++; 5010 } 5011 } 5012 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5013 5014 /* if free space is not available, make more free space */ 5015 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5016 /* copy data into free space, then initialize lnk */ 5017 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5018 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5019 5020 current_space->array += bnzi; 5021 current_space->local_used += bnzi; 5022 current_space->local_remaining -= bnzi; 5023 5024 bi[i + 1] = bi[i] + bnzi; 5025 } 5026 5027 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5028 5029 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5030 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5031 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5032 5033 /* create symbolic parallel matrix B_mpi */ 5034 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5035 PetscCall(MatCreate(comm, &B_mpi)); 5036 if (n == PETSC_DECIDE) { 5037 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5038 } else { 5039 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5040 } 5041 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5042 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5043 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5044 MatPreallocateEnd(dnz, onz); 5045 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5046 5047 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5048 B_mpi->assembled = PETSC_FALSE; 5049 merge->bi = bi; 5050 merge->bj = bj; 5051 merge->buf_ri = buf_ri; 5052 merge->buf_rj = buf_rj; 5053 merge->coi = NULL; 5054 merge->coj = NULL; 5055 merge->owners_co = NULL; 5056 5057 PetscCall(PetscCommDestroy(&comm)); 5058 5059 /* attach the supporting struct to B_mpi for reuse */ 5060 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5061 PetscCall(PetscContainerSetPointer(container, merge)); 5062 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5063 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5064 PetscCall(PetscContainerDestroy(&container)); 5065 *mpimat = B_mpi; 5066 5067 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5068 PetscFunctionReturn(PETSC_SUCCESS); 5069 } 5070 5071 /*@C 5072 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5073 matrices from each processor 5074 5075 Collective 5076 5077 Input Parameters: 5078 + comm - the communicators the parallel matrix will live on 5079 . seqmat - the input sequential matrices 5080 . m - number of local rows (or `PETSC_DECIDE`) 5081 . n - number of local columns (or `PETSC_DECIDE`) 5082 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5083 5084 Output Parameter: 5085 . mpimat - the parallel matrix generated 5086 5087 Level: advanced 5088 5089 Note: 5090 The dimensions of the sequential matrix in each processor MUST be the same. 5091 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5092 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5093 5094 .seealso: [](chapter_matrices), `Mat`, `MatCreateAIJ()` 5095 @*/ 5096 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5097 { 5098 PetscMPIInt size; 5099 5100 PetscFunctionBegin; 5101 PetscCallMPI(MPI_Comm_size(comm, &size)); 5102 if (size == 1) { 5103 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5104 if (scall == MAT_INITIAL_MATRIX) { 5105 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5106 } else { 5107 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5108 } 5109 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5110 PetscFunctionReturn(PETSC_SUCCESS); 5111 } 5112 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5113 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5114 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5115 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5116 PetscFunctionReturn(PETSC_SUCCESS); 5117 } 5118 5119 /*@ 5120 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking its local rows and putting them into a sequential matrix with 5121 mlocal rows and n columns. Where mlocal is obtained with `MatGetLocalSize()` and n is the global column count obtained 5122 with `MatGetSize()` 5123 5124 Not Collective 5125 5126 Input Parameter: 5127 . A - the matrix 5128 5129 Output Parameter: 5130 . A_loc - the local sequential matrix generated 5131 5132 Level: developer 5133 5134 Notes: 5135 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5136 5137 Destroy the matrix with `MatDestroy()` 5138 5139 .seealso: [](chapter_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5140 @*/ 5141 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5142 { 5143 PetscBool mpi; 5144 5145 PetscFunctionBegin; 5146 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5147 if (mpi) { 5148 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5149 } else { 5150 *A_loc = A; 5151 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5152 } 5153 PetscFunctionReturn(PETSC_SUCCESS); 5154 } 5155 5156 /*@ 5157 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5158 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5159 with `MatGetSize()` 5160 5161 Not Collective 5162 5163 Input Parameters: 5164 + A - the matrix 5165 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5166 5167 Output Parameter: 5168 . A_loc - the local sequential matrix generated 5169 5170 Level: developer 5171 5172 Notes: 5173 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5174 5175 When the communicator associated with `A` has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A`. 5176 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called. 5177 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5178 modify the values of the returned `A_loc`. 5179 5180 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5181 @*/ 5182 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5183 { 5184 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5185 Mat_SeqAIJ *mat, *a, *b; 5186 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5187 const PetscScalar *aa, *ba, *aav, *bav; 5188 PetscScalar *ca, *cam; 5189 PetscMPIInt size; 5190 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5191 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5192 PetscBool match; 5193 5194 PetscFunctionBegin; 5195 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5196 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5197 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5198 if (size == 1) { 5199 if (scall == MAT_INITIAL_MATRIX) { 5200 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5201 *A_loc = mpimat->A; 5202 } else if (scall == MAT_REUSE_MATRIX) { 5203 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5204 } 5205 PetscFunctionReturn(PETSC_SUCCESS); 5206 } 5207 5208 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5209 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5210 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5211 ai = a->i; 5212 aj = a->j; 5213 bi = b->i; 5214 bj = b->j; 5215 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5216 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5217 aa = aav; 5218 ba = bav; 5219 if (scall == MAT_INITIAL_MATRIX) { 5220 PetscCall(PetscMalloc1(1 + am, &ci)); 5221 ci[0] = 0; 5222 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5223 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5224 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5225 k = 0; 5226 for (i = 0; i < am; i++) { 5227 ncols_o = bi[i + 1] - bi[i]; 5228 ncols_d = ai[i + 1] - ai[i]; 5229 /* off-diagonal portion of A */ 5230 for (jo = 0; jo < ncols_o; jo++) { 5231 col = cmap[*bj]; 5232 if (col >= cstart) break; 5233 cj[k] = col; 5234 bj++; 5235 ca[k++] = *ba++; 5236 } 5237 /* diagonal portion of A */ 5238 for (j = 0; j < ncols_d; j++) { 5239 cj[k] = cstart + *aj++; 5240 ca[k++] = *aa++; 5241 } 5242 /* off-diagonal portion of A */ 5243 for (j = jo; j < ncols_o; j++) { 5244 cj[k] = cmap[*bj++]; 5245 ca[k++] = *ba++; 5246 } 5247 } 5248 /* put together the new matrix */ 5249 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5250 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5251 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5252 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5253 mat->free_a = PETSC_TRUE; 5254 mat->free_ij = PETSC_TRUE; 5255 mat->nonew = 0; 5256 } else if (scall == MAT_REUSE_MATRIX) { 5257 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5258 ci = mat->i; 5259 cj = mat->j; 5260 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5261 for (i = 0; i < am; i++) { 5262 /* off-diagonal portion of A */ 5263 ncols_o = bi[i + 1] - bi[i]; 5264 for (jo = 0; jo < ncols_o; jo++) { 5265 col = cmap[*bj]; 5266 if (col >= cstart) break; 5267 *cam++ = *ba++; 5268 bj++; 5269 } 5270 /* diagonal portion of A */ 5271 ncols_d = ai[i + 1] - ai[i]; 5272 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5273 /* off-diagonal portion of A */ 5274 for (j = jo; j < ncols_o; j++) { 5275 *cam++ = *ba++; 5276 bj++; 5277 } 5278 } 5279 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5280 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5281 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5282 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5283 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5284 PetscFunctionReturn(PETSC_SUCCESS); 5285 } 5286 5287 /*@ 5288 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5289 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5290 5291 Not Collective 5292 5293 Input Parameters: 5294 + A - the matrix 5295 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5296 5297 Output Parameters: 5298 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5299 - A_loc - the local sequential matrix generated 5300 5301 Level: developer 5302 5303 Note: 5304 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5305 part, then those associated with the off diagonal part (in its local ordering) 5306 5307 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5308 @*/ 5309 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5310 { 5311 Mat Ao, Ad; 5312 const PetscInt *cmap; 5313 PetscMPIInt size; 5314 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5315 5316 PetscFunctionBegin; 5317 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5318 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5319 if (size == 1) { 5320 if (scall == MAT_INITIAL_MATRIX) { 5321 PetscCall(PetscObjectReference((PetscObject)Ad)); 5322 *A_loc = Ad; 5323 } else if (scall == MAT_REUSE_MATRIX) { 5324 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5325 } 5326 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5327 PetscFunctionReturn(PETSC_SUCCESS); 5328 } 5329 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5330 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5331 if (f) { 5332 PetscCall((*f)(A, scall, glob, A_loc)); 5333 } else { 5334 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5335 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5336 Mat_SeqAIJ *c; 5337 PetscInt *ai = a->i, *aj = a->j; 5338 PetscInt *bi = b->i, *bj = b->j; 5339 PetscInt *ci, *cj; 5340 const PetscScalar *aa, *ba; 5341 PetscScalar *ca; 5342 PetscInt i, j, am, dn, on; 5343 5344 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5345 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5346 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5347 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5348 if (scall == MAT_INITIAL_MATRIX) { 5349 PetscInt k; 5350 PetscCall(PetscMalloc1(1 + am, &ci)); 5351 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5352 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5353 ci[0] = 0; 5354 for (i = 0, k = 0; i < am; i++) { 5355 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5356 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5357 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5358 /* diagonal portion of A */ 5359 for (j = 0; j < ncols_d; j++, k++) { 5360 cj[k] = *aj++; 5361 ca[k] = *aa++; 5362 } 5363 /* off-diagonal portion of A */ 5364 for (j = 0; j < ncols_o; j++, k++) { 5365 cj[k] = dn + *bj++; 5366 ca[k] = *ba++; 5367 } 5368 } 5369 /* put together the new matrix */ 5370 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5371 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5372 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5373 c = (Mat_SeqAIJ *)(*A_loc)->data; 5374 c->free_a = PETSC_TRUE; 5375 c->free_ij = PETSC_TRUE; 5376 c->nonew = 0; 5377 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5378 } else if (scall == MAT_REUSE_MATRIX) { 5379 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5380 for (i = 0; i < am; i++) { 5381 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5382 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5383 /* diagonal portion of A */ 5384 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5385 /* off-diagonal portion of A */ 5386 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5387 } 5388 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5389 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5390 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5391 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5392 if (glob) { 5393 PetscInt cst, *gidx; 5394 5395 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5396 PetscCall(PetscMalloc1(dn + on, &gidx)); 5397 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5398 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5399 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5400 } 5401 } 5402 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5403 PetscFunctionReturn(PETSC_SUCCESS); 5404 } 5405 5406 /*@C 5407 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5408 5409 Not Collective 5410 5411 Input Parameters: 5412 + A - the matrix 5413 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5414 . row - index set of rows to extract (or `NULL`) 5415 - col - index set of columns to extract (or `NULL`) 5416 5417 Output Parameter: 5418 . A_loc - the local sequential matrix generated 5419 5420 Level: developer 5421 5422 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5423 @*/ 5424 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5425 { 5426 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5427 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5428 IS isrowa, iscola; 5429 Mat *aloc; 5430 PetscBool match; 5431 5432 PetscFunctionBegin; 5433 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5434 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5435 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5436 if (!row) { 5437 start = A->rmap->rstart; 5438 end = A->rmap->rend; 5439 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5440 } else { 5441 isrowa = *row; 5442 } 5443 if (!col) { 5444 start = A->cmap->rstart; 5445 cmap = a->garray; 5446 nzA = a->A->cmap->n; 5447 nzB = a->B->cmap->n; 5448 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5449 ncols = 0; 5450 for (i = 0; i < nzB; i++) { 5451 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5452 else break; 5453 } 5454 imark = i; 5455 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5456 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5457 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5458 } else { 5459 iscola = *col; 5460 } 5461 if (scall != MAT_INITIAL_MATRIX) { 5462 PetscCall(PetscMalloc1(1, &aloc)); 5463 aloc[0] = *A_loc; 5464 } 5465 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5466 if (!col) { /* attach global id of condensed columns */ 5467 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5468 } 5469 *A_loc = aloc[0]; 5470 PetscCall(PetscFree(aloc)); 5471 if (!row) PetscCall(ISDestroy(&isrowa)); 5472 if (!col) PetscCall(ISDestroy(&iscola)); 5473 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5474 PetscFunctionReturn(PETSC_SUCCESS); 5475 } 5476 5477 /* 5478 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5479 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5480 * on a global size. 5481 * */ 5482 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5483 { 5484 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5485 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5486 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5487 PetscMPIInt owner; 5488 PetscSFNode *iremote, *oiremote; 5489 const PetscInt *lrowindices; 5490 PetscSF sf, osf; 5491 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5492 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5493 MPI_Comm comm; 5494 ISLocalToGlobalMapping mapping; 5495 const PetscScalar *pd_a, *po_a; 5496 5497 PetscFunctionBegin; 5498 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5499 /* plocalsize is the number of roots 5500 * nrows is the number of leaves 5501 * */ 5502 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5503 PetscCall(ISGetLocalSize(rows, &nrows)); 5504 PetscCall(PetscCalloc1(nrows, &iremote)); 5505 PetscCall(ISGetIndices(rows, &lrowindices)); 5506 for (i = 0; i < nrows; i++) { 5507 /* Find a remote index and an owner for a row 5508 * The row could be local or remote 5509 * */ 5510 owner = 0; 5511 lidx = 0; 5512 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5513 iremote[i].index = lidx; 5514 iremote[i].rank = owner; 5515 } 5516 /* Create SF to communicate how many nonzero columns for each row */ 5517 PetscCall(PetscSFCreate(comm, &sf)); 5518 /* SF will figure out the number of nonzero colunms for each row, and their 5519 * offsets 5520 * */ 5521 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5522 PetscCall(PetscSFSetFromOptions(sf)); 5523 PetscCall(PetscSFSetUp(sf)); 5524 5525 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5526 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5527 PetscCall(PetscCalloc1(nrows, &pnnz)); 5528 roffsets[0] = 0; 5529 roffsets[1] = 0; 5530 for (i = 0; i < plocalsize; i++) { 5531 /* diag */ 5532 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5533 /* off diag */ 5534 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5535 /* compute offsets so that we relative location for each row */ 5536 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5537 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5538 } 5539 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5540 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5541 /* 'r' means root, and 'l' means leaf */ 5542 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5543 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5544 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5545 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5546 PetscCall(PetscSFDestroy(&sf)); 5547 PetscCall(PetscFree(roffsets)); 5548 PetscCall(PetscFree(nrcols)); 5549 dntotalcols = 0; 5550 ontotalcols = 0; 5551 ncol = 0; 5552 for (i = 0; i < nrows; i++) { 5553 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5554 ncol = PetscMax(pnnz[i], ncol); 5555 /* diag */ 5556 dntotalcols += nlcols[i * 2 + 0]; 5557 /* off diag */ 5558 ontotalcols += nlcols[i * 2 + 1]; 5559 } 5560 /* We do not need to figure the right number of columns 5561 * since all the calculations will be done by going through the raw data 5562 * */ 5563 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5564 PetscCall(MatSetUp(*P_oth)); 5565 PetscCall(PetscFree(pnnz)); 5566 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5567 /* diag */ 5568 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5569 /* off diag */ 5570 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5571 /* diag */ 5572 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5573 /* off diag */ 5574 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5575 dntotalcols = 0; 5576 ontotalcols = 0; 5577 ntotalcols = 0; 5578 for (i = 0; i < nrows; i++) { 5579 owner = 0; 5580 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5581 /* Set iremote for diag matrix */ 5582 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5583 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5584 iremote[dntotalcols].rank = owner; 5585 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5586 ilocal[dntotalcols++] = ntotalcols++; 5587 } 5588 /* off diag */ 5589 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5590 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5591 oiremote[ontotalcols].rank = owner; 5592 oilocal[ontotalcols++] = ntotalcols++; 5593 } 5594 } 5595 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5596 PetscCall(PetscFree(loffsets)); 5597 PetscCall(PetscFree(nlcols)); 5598 PetscCall(PetscSFCreate(comm, &sf)); 5599 /* P serves as roots and P_oth is leaves 5600 * Diag matrix 5601 * */ 5602 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5603 PetscCall(PetscSFSetFromOptions(sf)); 5604 PetscCall(PetscSFSetUp(sf)); 5605 5606 PetscCall(PetscSFCreate(comm, &osf)); 5607 /* Off diag */ 5608 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5609 PetscCall(PetscSFSetFromOptions(osf)); 5610 PetscCall(PetscSFSetUp(osf)); 5611 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5612 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5613 /* We operate on the matrix internal data for saving memory */ 5614 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5615 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5616 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5617 /* Convert to global indices for diag matrix */ 5618 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5619 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5620 /* We want P_oth store global indices */ 5621 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5622 /* Use memory scalable approach */ 5623 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5624 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5625 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5626 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5627 /* Convert back to local indices */ 5628 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5629 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5630 nout = 0; 5631 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5632 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5633 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5634 /* Exchange values */ 5635 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5636 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5637 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5638 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5639 /* Stop PETSc from shrinking memory */ 5640 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5641 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5642 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5643 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5644 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5645 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5646 PetscCall(PetscSFDestroy(&sf)); 5647 PetscCall(PetscSFDestroy(&osf)); 5648 PetscFunctionReturn(PETSC_SUCCESS); 5649 } 5650 5651 /* 5652 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5653 * This supports MPIAIJ and MAIJ 5654 * */ 5655 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5656 { 5657 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5658 Mat_SeqAIJ *p_oth; 5659 IS rows, map; 5660 PetscHMapI hamp; 5661 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5662 MPI_Comm comm; 5663 PetscSF sf, osf; 5664 PetscBool has; 5665 5666 PetscFunctionBegin; 5667 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5668 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5669 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5670 * and then create a submatrix (that often is an overlapping matrix) 5671 * */ 5672 if (reuse == MAT_INITIAL_MATRIX) { 5673 /* Use a hash table to figure out unique keys */ 5674 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5675 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5676 count = 0; 5677 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5678 for (i = 0; i < a->B->cmap->n; i++) { 5679 key = a->garray[i] / dof; 5680 PetscCall(PetscHMapIHas(hamp, key, &has)); 5681 if (!has) { 5682 mapping[i] = count; 5683 PetscCall(PetscHMapISet(hamp, key, count++)); 5684 } else { 5685 /* Current 'i' has the same value the previous step */ 5686 mapping[i] = count - 1; 5687 } 5688 } 5689 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5690 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5691 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5692 PetscCall(PetscCalloc1(htsize, &rowindices)); 5693 off = 0; 5694 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5695 PetscCall(PetscHMapIDestroy(&hamp)); 5696 PetscCall(PetscSortInt(htsize, rowindices)); 5697 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5698 /* In case, the matrix was already created but users want to recreate the matrix */ 5699 PetscCall(MatDestroy(P_oth)); 5700 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5701 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5702 PetscCall(ISDestroy(&map)); 5703 PetscCall(ISDestroy(&rows)); 5704 } else if (reuse == MAT_REUSE_MATRIX) { 5705 /* If matrix was already created, we simply update values using SF objects 5706 * that as attached to the matrix earlier. 5707 */ 5708 const PetscScalar *pd_a, *po_a; 5709 5710 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5711 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5712 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5713 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5714 /* Update values in place */ 5715 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5716 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5717 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5718 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5719 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5720 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5721 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5722 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5723 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5724 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5725 PetscFunctionReturn(PETSC_SUCCESS); 5726 } 5727 5728 /*@C 5729 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5730 5731 Collective 5732 5733 Input Parameters: 5734 + A - the first matrix in `MATMPIAIJ` format 5735 . B - the second matrix in `MATMPIAIJ` format 5736 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5737 5738 Output Parameters: 5739 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5740 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5741 - B_seq - the sequential matrix generated 5742 5743 Level: developer 5744 5745 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5746 @*/ 5747 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5748 { 5749 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5750 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5751 IS isrowb, iscolb; 5752 Mat *bseq = NULL; 5753 5754 PetscFunctionBegin; 5755 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5756 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5757 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5758 5759 if (scall == MAT_INITIAL_MATRIX) { 5760 start = A->cmap->rstart; 5761 cmap = a->garray; 5762 nzA = a->A->cmap->n; 5763 nzB = a->B->cmap->n; 5764 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5765 ncols = 0; 5766 for (i = 0; i < nzB; i++) { /* row < local row index */ 5767 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5768 else break; 5769 } 5770 imark = i; 5771 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5772 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5773 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5774 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5775 } else { 5776 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5777 isrowb = *rowb; 5778 iscolb = *colb; 5779 PetscCall(PetscMalloc1(1, &bseq)); 5780 bseq[0] = *B_seq; 5781 } 5782 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5783 *B_seq = bseq[0]; 5784 PetscCall(PetscFree(bseq)); 5785 if (!rowb) { 5786 PetscCall(ISDestroy(&isrowb)); 5787 } else { 5788 *rowb = isrowb; 5789 } 5790 if (!colb) { 5791 PetscCall(ISDestroy(&iscolb)); 5792 } else { 5793 *colb = iscolb; 5794 } 5795 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5796 PetscFunctionReturn(PETSC_SUCCESS); 5797 } 5798 5799 /* 5800 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5801 of the OFF-DIAGONAL portion of local A 5802 5803 Collective 5804 5805 Input Parameters: 5806 + A,B - the matrices in `MATMPIAIJ` format 5807 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5808 5809 Output Parameter: 5810 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5811 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5812 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5813 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5814 5815 Developer Note: 5816 This directly accesses information inside the VecScatter associated with the matrix-vector product 5817 for this matrix. This is not desirable.. 5818 5819 Level: developer 5820 5821 */ 5822 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5823 { 5824 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5825 Mat_SeqAIJ *b_oth; 5826 VecScatter ctx; 5827 MPI_Comm comm; 5828 const PetscMPIInt *rprocs, *sprocs; 5829 const PetscInt *srow, *rstarts, *sstarts; 5830 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5831 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5832 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5833 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5834 PetscMPIInt size, tag, rank, nreqs; 5835 5836 PetscFunctionBegin; 5837 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5838 PetscCallMPI(MPI_Comm_size(comm, &size)); 5839 5840 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5841 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5842 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5843 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5844 5845 if (size == 1) { 5846 startsj_s = NULL; 5847 bufa_ptr = NULL; 5848 *B_oth = NULL; 5849 PetscFunctionReturn(PETSC_SUCCESS); 5850 } 5851 5852 ctx = a->Mvctx; 5853 tag = ((PetscObject)ctx)->tag; 5854 5855 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5856 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5857 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5858 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5859 PetscCall(PetscMalloc1(nreqs, &reqs)); 5860 rwaits = reqs; 5861 swaits = reqs + nrecvs; 5862 5863 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5864 if (scall == MAT_INITIAL_MATRIX) { 5865 /* i-array */ 5866 /* post receives */ 5867 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5868 for (i = 0; i < nrecvs; i++) { 5869 rowlen = rvalues + rstarts[i] * rbs; 5870 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5871 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5872 } 5873 5874 /* pack the outgoing message */ 5875 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5876 5877 sstartsj[0] = 0; 5878 rstartsj[0] = 0; 5879 len = 0; /* total length of j or a array to be sent */ 5880 if (nsends) { 5881 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5882 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5883 } 5884 for (i = 0; i < nsends; i++) { 5885 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5886 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5887 for (j = 0; j < nrows; j++) { 5888 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5889 for (l = 0; l < sbs; l++) { 5890 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5891 5892 rowlen[j * sbs + l] = ncols; 5893 5894 len += ncols; 5895 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5896 } 5897 k++; 5898 } 5899 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5900 5901 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5902 } 5903 /* recvs and sends of i-array are completed */ 5904 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5905 PetscCall(PetscFree(svalues)); 5906 5907 /* allocate buffers for sending j and a arrays */ 5908 PetscCall(PetscMalloc1(len + 1, &bufj)); 5909 PetscCall(PetscMalloc1(len + 1, &bufa)); 5910 5911 /* create i-array of B_oth */ 5912 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5913 5914 b_othi[0] = 0; 5915 len = 0; /* total length of j or a array to be received */ 5916 k = 0; 5917 for (i = 0; i < nrecvs; i++) { 5918 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5919 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5920 for (j = 0; j < nrows; j++) { 5921 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5922 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5923 k++; 5924 } 5925 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5926 } 5927 PetscCall(PetscFree(rvalues)); 5928 5929 /* allocate space for j and a arrays of B_oth */ 5930 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5931 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5932 5933 /* j-array */ 5934 /* post receives of j-array */ 5935 for (i = 0; i < nrecvs; i++) { 5936 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5937 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5938 } 5939 5940 /* pack the outgoing message j-array */ 5941 if (nsends) k = sstarts[0]; 5942 for (i = 0; i < nsends; i++) { 5943 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5944 bufJ = bufj + sstartsj[i]; 5945 for (j = 0; j < nrows; j++) { 5946 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5947 for (ll = 0; ll < sbs; ll++) { 5948 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5949 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5950 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5951 } 5952 } 5953 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5954 } 5955 5956 /* recvs and sends of j-array are completed */ 5957 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5958 } else if (scall == MAT_REUSE_MATRIX) { 5959 sstartsj = *startsj_s; 5960 rstartsj = *startsj_r; 5961 bufa = *bufa_ptr; 5962 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5963 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5964 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5965 5966 /* a-array */ 5967 /* post receives of a-array */ 5968 for (i = 0; i < nrecvs; i++) { 5969 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5970 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5971 } 5972 5973 /* pack the outgoing message a-array */ 5974 if (nsends) k = sstarts[0]; 5975 for (i = 0; i < nsends; i++) { 5976 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5977 bufA = bufa + sstartsj[i]; 5978 for (j = 0; j < nrows; j++) { 5979 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5980 for (ll = 0; ll < sbs; ll++) { 5981 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5982 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5983 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5984 } 5985 } 5986 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5987 } 5988 /* recvs and sends of a-array are completed */ 5989 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5990 PetscCall(PetscFree(reqs)); 5991 5992 if (scall == MAT_INITIAL_MATRIX) { 5993 /* put together the new matrix */ 5994 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5995 5996 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5997 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5998 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5999 b_oth->free_a = PETSC_TRUE; 6000 b_oth->free_ij = PETSC_TRUE; 6001 b_oth->nonew = 0; 6002 6003 PetscCall(PetscFree(bufj)); 6004 if (!startsj_s || !bufa_ptr) { 6005 PetscCall(PetscFree2(sstartsj, rstartsj)); 6006 PetscCall(PetscFree(bufa_ptr)); 6007 } else { 6008 *startsj_s = sstartsj; 6009 *startsj_r = rstartsj; 6010 *bufa_ptr = bufa; 6011 } 6012 } else if (scall == MAT_REUSE_MATRIX) { 6013 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6014 } 6015 6016 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6017 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6018 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6019 PetscFunctionReturn(PETSC_SUCCESS); 6020 } 6021 6022 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6023 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6024 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6025 #if defined(PETSC_HAVE_MKL_SPARSE) 6026 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6027 #endif 6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6030 #if defined(PETSC_HAVE_ELEMENTAL) 6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6032 #endif 6033 #if defined(PETSC_HAVE_SCALAPACK) 6034 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6035 #endif 6036 #if defined(PETSC_HAVE_HYPRE) 6037 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6038 #endif 6039 #if defined(PETSC_HAVE_CUDA) 6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6041 #endif 6042 #if defined(PETSC_HAVE_HIP) 6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6044 #endif 6045 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6047 #endif 6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6049 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6050 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6051 6052 /* 6053 Computes (B'*A')' since computing B*A directly is untenable 6054 6055 n p p 6056 [ ] [ ] [ ] 6057 m [ A ] * n [ B ] = m [ C ] 6058 [ ] [ ] [ ] 6059 6060 */ 6061 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6062 { 6063 Mat At, Bt, Ct; 6064 6065 PetscFunctionBegin; 6066 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6067 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6068 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6069 PetscCall(MatDestroy(&At)); 6070 PetscCall(MatDestroy(&Bt)); 6071 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6072 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6073 PetscCall(MatDestroy(&Ct)); 6074 PetscFunctionReturn(PETSC_SUCCESS); 6075 } 6076 6077 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6078 { 6079 PetscBool cisdense; 6080 6081 PetscFunctionBegin; 6082 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6083 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6084 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6085 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6086 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6087 PetscCall(MatSetUp(C)); 6088 6089 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6090 PetscFunctionReturn(PETSC_SUCCESS); 6091 } 6092 6093 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6094 { 6095 Mat_Product *product = C->product; 6096 Mat A = product->A, B = product->B; 6097 6098 PetscFunctionBegin; 6099 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6100 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6101 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6102 C->ops->productsymbolic = MatProductSymbolic_AB; 6103 PetscFunctionReturn(PETSC_SUCCESS); 6104 } 6105 6106 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6107 { 6108 Mat_Product *product = C->product; 6109 6110 PetscFunctionBegin; 6111 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6112 PetscFunctionReturn(PETSC_SUCCESS); 6113 } 6114 6115 /* 6116 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6117 6118 Input Parameters: 6119 6120 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6121 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6122 6123 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6124 6125 For Set1, j1[] contains column indices of the nonzeros. 6126 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6127 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6128 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6129 6130 Similar for Set2. 6131 6132 This routine merges the two sets of nonzeros row by row and removes repeats. 6133 6134 Output Parameters: (memory is allocated by the caller) 6135 6136 i[],j[]: the CSR of the merged matrix, which has m rows. 6137 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6138 imap2[]: similar to imap1[], but for Set2. 6139 Note we order nonzeros row-by-row and from left to right. 6140 */ 6141 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6142 { 6143 PetscInt r, m; /* Row index of mat */ 6144 PetscCount t, t1, t2, b1, e1, b2, e2; 6145 6146 PetscFunctionBegin; 6147 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6148 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6149 i[0] = 0; 6150 for (r = 0; r < m; r++) { /* Do row by row merging */ 6151 b1 = rowBegin1[r]; 6152 e1 = rowEnd1[r]; 6153 b2 = rowBegin2[r]; 6154 e2 = rowEnd2[r]; 6155 while (b1 < e1 && b2 < e2) { 6156 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6157 j[t] = j1[b1]; 6158 imap1[t1] = t; 6159 imap2[t2] = t; 6160 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6161 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6162 t1++; 6163 t2++; 6164 t++; 6165 } else if (j1[b1] < j2[b2]) { 6166 j[t] = j1[b1]; 6167 imap1[t1] = t; 6168 b1 += jmap1[t1 + 1] - jmap1[t1]; 6169 t1++; 6170 t++; 6171 } else { 6172 j[t] = j2[b2]; 6173 imap2[t2] = t; 6174 b2 += jmap2[t2 + 1] - jmap2[t2]; 6175 t2++; 6176 t++; 6177 } 6178 } 6179 /* Merge the remaining in either j1[] or j2[] */ 6180 while (b1 < e1) { 6181 j[t] = j1[b1]; 6182 imap1[t1] = t; 6183 b1 += jmap1[t1 + 1] - jmap1[t1]; 6184 t1++; 6185 t++; 6186 } 6187 while (b2 < e2) { 6188 j[t] = j2[b2]; 6189 imap2[t2] = t; 6190 b2 += jmap2[t2 + 1] - jmap2[t2]; 6191 t2++; 6192 t++; 6193 } 6194 i[r + 1] = t; 6195 } 6196 PetscFunctionReturn(PETSC_SUCCESS); 6197 } 6198 6199 /* 6200 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6201 6202 Input Parameters: 6203 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6204 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6205 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6206 6207 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6208 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6209 6210 Output Parameters: 6211 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6212 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6213 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6214 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6215 6216 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6217 Atot: number of entries belonging to the diagonal block. 6218 Annz: number of unique nonzeros belonging to the diagonal block. 6219 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6220 repeats (i.e., same 'i,j' pair). 6221 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6222 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6223 6224 Atot: number of entries belonging to the diagonal block 6225 Annz: number of unique nonzeros belonging to the diagonal block. 6226 6227 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6228 6229 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6230 */ 6231 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6232 { 6233 PetscInt cstart, cend, rstart, rend, row, col; 6234 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6235 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6236 PetscCount k, m, p, q, r, s, mid; 6237 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6238 6239 PetscFunctionBegin; 6240 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6241 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6242 m = rend - rstart; 6243 6244 for (k = 0; k < n; k++) { 6245 if (i[k] >= 0) break; 6246 } /* Skip negative rows */ 6247 6248 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6249 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6250 */ 6251 while (k < n) { 6252 row = i[k]; 6253 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6254 for (s = k; s < n; s++) 6255 if (i[s] != row) break; 6256 for (p = k; p < s; p++) { 6257 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6258 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6259 } 6260 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6261 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6262 rowBegin[row - rstart] = k; 6263 rowMid[row - rstart] = mid; 6264 rowEnd[row - rstart] = s; 6265 6266 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6267 Atot += mid - k; 6268 Btot += s - mid; 6269 6270 /* Count unique nonzeros of this diag/offdiag row */ 6271 for (p = k; p < mid;) { 6272 col = j[p]; 6273 do { 6274 j[p] += PETSC_MAX_INT; 6275 p++; 6276 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6277 Annz++; 6278 } 6279 6280 for (p = mid; p < s;) { 6281 col = j[p]; 6282 do { 6283 p++; 6284 } while (p < s && j[p] == col); 6285 Bnnz++; 6286 } 6287 k = s; 6288 } 6289 6290 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6291 PetscCall(PetscMalloc1(Atot, &Aperm)); 6292 PetscCall(PetscMalloc1(Btot, &Bperm)); 6293 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6294 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6295 6296 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6297 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6298 for (r = 0; r < m; r++) { 6299 k = rowBegin[r]; 6300 mid = rowMid[r]; 6301 s = rowEnd[r]; 6302 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6303 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6304 Atot += mid - k; 6305 Btot += s - mid; 6306 6307 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6308 for (p = k; p < mid;) { 6309 col = j[p]; 6310 q = p; 6311 do { 6312 p++; 6313 } while (p < mid && j[p] == col); 6314 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6315 Annz++; 6316 } 6317 6318 for (p = mid; p < s;) { 6319 col = j[p]; 6320 q = p; 6321 do { 6322 p++; 6323 } while (p < s && j[p] == col); 6324 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6325 Bnnz++; 6326 } 6327 } 6328 /* Output */ 6329 *Aperm_ = Aperm; 6330 *Annz_ = Annz; 6331 *Atot_ = Atot; 6332 *Ajmap_ = Ajmap; 6333 *Bperm_ = Bperm; 6334 *Bnnz_ = Bnnz; 6335 *Btot_ = Btot; 6336 *Bjmap_ = Bjmap; 6337 PetscFunctionReturn(PETSC_SUCCESS); 6338 } 6339 6340 /* 6341 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6342 6343 Input Parameters: 6344 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6345 nnz: number of unique nonzeros in the merged matrix 6346 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6347 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6348 6349 Output Parameter: (memory is allocated by the caller) 6350 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6351 6352 Example: 6353 nnz1 = 4 6354 nnz = 6 6355 imap = [1,3,4,5] 6356 jmap = [0,3,5,6,7] 6357 then, 6358 jmap_new = [0,0,3,3,5,6,7] 6359 */ 6360 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6361 { 6362 PetscCount k, p; 6363 6364 PetscFunctionBegin; 6365 jmap_new[0] = 0; 6366 p = nnz; /* p loops over jmap_new[] backwards */ 6367 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6368 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6369 } 6370 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6371 PetscFunctionReturn(PETSC_SUCCESS); 6372 } 6373 6374 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6375 { 6376 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6377 6378 PetscFunctionBegin; 6379 PetscCall(PetscSFDestroy(&coo->sf)); 6380 PetscCall(PetscFree(coo->Aperm1)); 6381 PetscCall(PetscFree(coo->Bperm1)); 6382 PetscCall(PetscFree(coo->Ajmap1)); 6383 PetscCall(PetscFree(coo->Bjmap1)); 6384 PetscCall(PetscFree(coo->Aimap2)); 6385 PetscCall(PetscFree(coo->Bimap2)); 6386 PetscCall(PetscFree(coo->Aperm2)); 6387 PetscCall(PetscFree(coo->Bperm2)); 6388 PetscCall(PetscFree(coo->Ajmap2)); 6389 PetscCall(PetscFree(coo->Bjmap2)); 6390 PetscCall(PetscFree(coo->Cperm1)); 6391 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6392 PetscCall(PetscFree(coo)); 6393 PetscFunctionReturn(PETSC_SUCCESS); 6394 } 6395 6396 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6397 { 6398 MPI_Comm comm; 6399 PetscMPIInt rank, size; 6400 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6401 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6402 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6403 PetscContainer container; 6404 MatCOOStruct_MPIAIJ *coo; 6405 6406 PetscFunctionBegin; 6407 PetscCall(PetscFree(mpiaij->garray)); 6408 PetscCall(VecDestroy(&mpiaij->lvec)); 6409 #if defined(PETSC_USE_CTABLE) 6410 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6411 #else 6412 PetscCall(PetscFree(mpiaij->colmap)); 6413 #endif 6414 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6415 mat->assembled = PETSC_FALSE; 6416 mat->was_assembled = PETSC_FALSE; 6417 6418 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6419 PetscCallMPI(MPI_Comm_size(comm, &size)); 6420 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6421 PetscCall(PetscLayoutSetUp(mat->rmap)); 6422 PetscCall(PetscLayoutSetUp(mat->cmap)); 6423 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6424 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6425 PetscCall(MatGetLocalSize(mat, &m, &n)); 6426 PetscCall(MatGetSize(mat, &M, &N)); 6427 6428 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6429 /* entries come first, then local rows, then remote rows. */ 6430 PetscCount n1 = coo_n, *perm1; 6431 PetscInt *i1 = coo_i, *j1 = coo_j; 6432 6433 PetscCall(PetscMalloc1(n1, &perm1)); 6434 for (k = 0; k < n1; k++) perm1[k] = k; 6435 6436 /* Manipulate indices so that entries with negative row or col indices will have smallest 6437 row indices, local entries will have greater but negative row indices, and remote entries 6438 will have positive row indices. 6439 */ 6440 for (k = 0; k < n1; k++) { 6441 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6442 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6443 else { 6444 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6445 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6446 } 6447 } 6448 6449 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6450 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6451 for (k = 0; k < n1; k++) { 6452 if (i1[k] > PETSC_MIN_INT) break; 6453 } /* Advance k to the first entry we need to take care of */ 6454 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6455 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6456 6457 /* Split local rows into diag/offdiag portions */ 6458 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6459 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6460 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6461 6462 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6463 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6464 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6465 6466 /* Send remote rows to their owner */ 6467 /* Find which rows should be sent to which remote ranks*/ 6468 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6469 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6470 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6471 const PetscInt *ranges; 6472 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6473 6474 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6475 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6476 for (k = rem; k < n1;) { 6477 PetscMPIInt owner; 6478 PetscInt firstRow, lastRow; 6479 6480 /* Locate a row range */ 6481 firstRow = i1[k]; /* first row of this owner */ 6482 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6483 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6484 6485 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6486 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6487 6488 /* All entries in [k,p) belong to this remote owner */ 6489 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6490 PetscMPIInt *sendto2; 6491 PetscInt *nentries2; 6492 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6493 6494 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6495 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6496 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6497 PetscCall(PetscFree2(sendto, nentries2)); 6498 sendto = sendto2; 6499 nentries = nentries2; 6500 maxNsend = maxNsend2; 6501 } 6502 sendto[nsend] = owner; 6503 nentries[nsend] = p - k; 6504 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6505 nsend++; 6506 k = p; 6507 } 6508 6509 /* Build 1st SF to know offsets on remote to send data */ 6510 PetscSF sf1; 6511 PetscInt nroots = 1, nroots2 = 0; 6512 PetscInt nleaves = nsend, nleaves2 = 0; 6513 PetscInt *offsets; 6514 PetscSFNode *iremote; 6515 6516 PetscCall(PetscSFCreate(comm, &sf1)); 6517 PetscCall(PetscMalloc1(nsend, &iremote)); 6518 PetscCall(PetscMalloc1(nsend, &offsets)); 6519 for (k = 0; k < nsend; k++) { 6520 iremote[k].rank = sendto[k]; 6521 iremote[k].index = 0; 6522 nleaves2 += nentries[k]; 6523 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6524 } 6525 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6526 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6527 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6528 PetscCall(PetscSFDestroy(&sf1)); 6529 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6530 6531 /* Build 2nd SF to send remote COOs to their owner */ 6532 PetscSF sf2; 6533 nroots = nroots2; 6534 nleaves = nleaves2; 6535 PetscCall(PetscSFCreate(comm, &sf2)); 6536 PetscCall(PetscSFSetFromOptions(sf2)); 6537 PetscCall(PetscMalloc1(nleaves, &iremote)); 6538 p = 0; 6539 for (k = 0; k < nsend; k++) { 6540 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6541 for (q = 0; q < nentries[k]; q++, p++) { 6542 iremote[p].rank = sendto[k]; 6543 iremote[p].index = offsets[k] + q; 6544 } 6545 } 6546 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6547 6548 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6549 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6550 6551 /* Send the remote COOs to their owner */ 6552 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6553 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6554 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6555 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6556 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6557 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6558 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6559 6560 PetscCall(PetscFree(offsets)); 6561 PetscCall(PetscFree2(sendto, nentries)); 6562 6563 /* Sort received COOs by row along with the permutation array */ 6564 for (k = 0; k < n2; k++) perm2[k] = k; 6565 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6566 6567 /* Split received COOs into diag/offdiag portions */ 6568 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6569 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6570 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6571 6572 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6573 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6574 6575 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6576 PetscInt *Ai, *Bi; 6577 PetscInt *Aj, *Bj; 6578 6579 PetscCall(PetscMalloc1(m + 1, &Ai)); 6580 PetscCall(PetscMalloc1(m + 1, &Bi)); 6581 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6582 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6583 6584 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6585 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6586 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6587 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6588 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6589 6590 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6591 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6592 6593 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6594 /* expect nonzeros in A/B most likely have local contributing entries */ 6595 PetscInt Annz = Ai[m]; 6596 PetscInt Bnnz = Bi[m]; 6597 PetscCount *Ajmap1_new, *Bjmap1_new; 6598 6599 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6600 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6601 6602 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6603 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6604 6605 PetscCall(PetscFree(Aimap1)); 6606 PetscCall(PetscFree(Ajmap1)); 6607 PetscCall(PetscFree(Bimap1)); 6608 PetscCall(PetscFree(Bjmap1)); 6609 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6610 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6611 PetscCall(PetscFree(perm1)); 6612 PetscCall(PetscFree3(i2, j2, perm2)); 6613 6614 Ajmap1 = Ajmap1_new; 6615 Bjmap1 = Bjmap1_new; 6616 6617 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6618 if (Annz < Annz1 + Annz2) { 6619 PetscInt *Aj_new; 6620 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6621 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6622 PetscCall(PetscFree(Aj)); 6623 Aj = Aj_new; 6624 } 6625 6626 if (Bnnz < Bnnz1 + Bnnz2) { 6627 PetscInt *Bj_new; 6628 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6629 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6630 PetscCall(PetscFree(Bj)); 6631 Bj = Bj_new; 6632 } 6633 6634 /* Create new submatrices for on-process and off-process coupling */ 6635 PetscScalar *Aa, *Ba; 6636 MatType rtype; 6637 Mat_SeqAIJ *a, *b; 6638 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6639 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6640 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6641 if (cstart) { 6642 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6643 } 6644 PetscCall(MatDestroy(&mpiaij->A)); 6645 PetscCall(MatDestroy(&mpiaij->B)); 6646 PetscCall(MatGetRootType_Private(mat, &rtype)); 6647 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6648 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6649 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6650 6651 a = (Mat_SeqAIJ *)mpiaij->A->data; 6652 b = (Mat_SeqAIJ *)mpiaij->B->data; 6653 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6654 a->free_a = b->free_a = PETSC_TRUE; 6655 a->free_ij = b->free_ij = PETSC_TRUE; 6656 6657 /* conversion must happen AFTER multiply setup */ 6658 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6659 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6660 PetscCall(VecDestroy(&mpiaij->lvec)); 6661 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6662 6663 // Put the COO struct in a container and then attach that to the matrix 6664 PetscCall(PetscMalloc1(1, &coo)); 6665 coo->n = coo_n; 6666 coo->sf = sf2; 6667 coo->sendlen = nleaves; 6668 coo->recvlen = nroots; 6669 coo->Annz = Annz; 6670 coo->Bnnz = Bnnz; 6671 coo->Annz2 = Annz2; 6672 coo->Bnnz2 = Bnnz2; 6673 coo->Atot1 = Atot1; 6674 coo->Atot2 = Atot2; 6675 coo->Btot1 = Btot1; 6676 coo->Btot2 = Btot2; 6677 coo->Ajmap1 = Ajmap1; 6678 coo->Aperm1 = Aperm1; 6679 coo->Bjmap1 = Bjmap1; 6680 coo->Bperm1 = Bperm1; 6681 coo->Aimap2 = Aimap2; 6682 coo->Ajmap2 = Ajmap2; 6683 coo->Aperm2 = Aperm2; 6684 coo->Bimap2 = Bimap2; 6685 coo->Bjmap2 = Bjmap2; 6686 coo->Bperm2 = Bperm2; 6687 coo->Cperm1 = Cperm1; 6688 // Allocate in preallocation. If not used, it has zero cost on host 6689 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6690 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6691 PetscCall(PetscContainerSetPointer(container, coo)); 6692 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6693 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6694 PetscCall(PetscContainerDestroy(&container)); 6695 PetscFunctionReturn(PETSC_SUCCESS); 6696 } 6697 6698 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6699 { 6700 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6701 Mat A = mpiaij->A, B = mpiaij->B; 6702 PetscScalar *Aa, *Ba; 6703 PetscScalar *sendbuf, *recvbuf; 6704 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6705 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6706 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6707 const PetscCount *Cperm1; 6708 PetscContainer container; 6709 MatCOOStruct_MPIAIJ *coo; 6710 6711 PetscFunctionBegin; 6712 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6713 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6714 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6715 sendbuf = coo->sendbuf; 6716 recvbuf = coo->recvbuf; 6717 Ajmap1 = coo->Ajmap1; 6718 Ajmap2 = coo->Ajmap2; 6719 Aimap2 = coo->Aimap2; 6720 Bjmap1 = coo->Bjmap1; 6721 Bjmap2 = coo->Bjmap2; 6722 Bimap2 = coo->Bimap2; 6723 Aperm1 = coo->Aperm1; 6724 Aperm2 = coo->Aperm2; 6725 Bperm1 = coo->Bperm1; 6726 Bperm2 = coo->Bperm2; 6727 Cperm1 = coo->Cperm1; 6728 6729 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6730 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6731 6732 /* Pack entries to be sent to remote */ 6733 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6734 6735 /* Send remote entries to their owner and overlap the communication with local computation */ 6736 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6737 /* Add local entries to A and B */ 6738 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6739 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6740 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6741 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6742 } 6743 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6744 PetscScalar sum = 0.0; 6745 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6746 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6747 } 6748 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6749 6750 /* Add received remote entries to A and B */ 6751 for (PetscCount i = 0; i < coo->Annz2; i++) { 6752 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6753 } 6754 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6755 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6756 } 6757 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6758 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6759 PetscFunctionReturn(PETSC_SUCCESS); 6760 } 6761 6762 /*MC 6763 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6764 6765 Options Database Keys: 6766 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6767 6768 Level: beginner 6769 6770 Notes: 6771 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6772 in this case the values associated with the rows and columns one passes in are set to zero 6773 in the matrix 6774 6775 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6776 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6777 6778 .seealso: [](chapter_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6779 M*/ 6780 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6781 { 6782 Mat_MPIAIJ *b; 6783 PetscMPIInt size; 6784 6785 PetscFunctionBegin; 6786 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6787 6788 PetscCall(PetscNew(&b)); 6789 B->data = (void *)b; 6790 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6791 B->assembled = PETSC_FALSE; 6792 B->insertmode = NOT_SET_VALUES; 6793 b->size = size; 6794 6795 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6796 6797 /* build cache for off array entries formed */ 6798 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6799 6800 b->donotstash = PETSC_FALSE; 6801 b->colmap = NULL; 6802 b->garray = NULL; 6803 b->roworiented = PETSC_TRUE; 6804 6805 /* stuff used for matrix vector multiply */ 6806 b->lvec = NULL; 6807 b->Mvctx = NULL; 6808 6809 /* stuff for MatGetRow() */ 6810 b->rowindices = NULL; 6811 b->rowvalues = NULL; 6812 b->getrowactive = PETSC_FALSE; 6813 6814 /* flexible pointer used in CUSPARSE classes */ 6815 b->spptr = NULL; 6816 6817 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6818 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6819 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6820 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6821 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6822 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6823 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6824 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6825 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6826 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6827 #if defined(PETSC_HAVE_CUDA) 6828 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6829 #endif 6830 #if defined(PETSC_HAVE_HIP) 6831 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6832 #endif 6833 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6834 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6835 #endif 6836 #if defined(PETSC_HAVE_MKL_SPARSE) 6837 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6838 #endif 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6843 #if defined(PETSC_HAVE_ELEMENTAL) 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6845 #endif 6846 #if defined(PETSC_HAVE_SCALAPACK) 6847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6848 #endif 6849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6850 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6851 #if defined(PETSC_HAVE_HYPRE) 6852 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6853 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6854 #endif 6855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6859 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6860 PetscFunctionReturn(PETSC_SUCCESS); 6861 } 6862 6863 /*@C 6864 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6865 and "off-diagonal" part of the matrix in CSR format. 6866 6867 Collective 6868 6869 Input Parameters: 6870 + comm - MPI communicator 6871 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6872 . n - This value should be the same as the local size used in creating the 6873 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6874 calculated if `N` is given) For square matrices `n` is almost always `m`. 6875 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6876 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6877 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6878 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6879 . a - matrix values 6880 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6881 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6882 - oa - matrix values 6883 6884 Output Parameter: 6885 . mat - the matrix 6886 6887 Level: advanced 6888 6889 Notes: 6890 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6891 must free the arrays once the matrix has been destroyed and not before. 6892 6893 The `i` and `j` indices are 0 based 6894 6895 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6896 6897 This sets local rows and cannot be used to set off-processor values. 6898 6899 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6900 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6901 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6902 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6903 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6904 communication if it is known that only local entries will be set. 6905 6906 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6907 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6908 @*/ 6909 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6910 { 6911 Mat_MPIAIJ *maij; 6912 6913 PetscFunctionBegin; 6914 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6915 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6916 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6917 PetscCall(MatCreate(comm, mat)); 6918 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6919 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6920 maij = (Mat_MPIAIJ *)(*mat)->data; 6921 6922 (*mat)->preallocated = PETSC_TRUE; 6923 6924 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6925 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6926 6927 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6928 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6929 6930 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6931 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6932 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6933 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6934 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6935 PetscFunctionReturn(PETSC_SUCCESS); 6936 } 6937 6938 typedef struct { 6939 Mat *mp; /* intermediate products */ 6940 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6941 PetscInt cp; /* number of intermediate products */ 6942 6943 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6944 PetscInt *startsj_s, *startsj_r; 6945 PetscScalar *bufa; 6946 Mat P_oth; 6947 6948 /* may take advantage of merging product->B */ 6949 Mat Bloc; /* B-local by merging diag and off-diag */ 6950 6951 /* cusparse does not have support to split between symbolic and numeric phases. 6952 When api_user is true, we don't need to update the numerical values 6953 of the temporary storage */ 6954 PetscBool reusesym; 6955 6956 /* support for COO values insertion */ 6957 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6958 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6959 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6960 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6961 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6962 PetscMemType mtype; 6963 6964 /* customization */ 6965 PetscBool abmerge; 6966 PetscBool P_oth_bind; 6967 } MatMatMPIAIJBACKEND; 6968 6969 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6970 { 6971 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6972 PetscInt i; 6973 6974 PetscFunctionBegin; 6975 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6976 PetscCall(PetscFree(mmdata->bufa)); 6977 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6978 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6979 PetscCall(MatDestroy(&mmdata->P_oth)); 6980 PetscCall(MatDestroy(&mmdata->Bloc)); 6981 PetscCall(PetscSFDestroy(&mmdata->sf)); 6982 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6983 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6984 PetscCall(PetscFree(mmdata->own[0])); 6985 PetscCall(PetscFree(mmdata->own)); 6986 PetscCall(PetscFree(mmdata->off[0])); 6987 PetscCall(PetscFree(mmdata->off)); 6988 PetscCall(PetscFree(mmdata)); 6989 PetscFunctionReturn(PETSC_SUCCESS); 6990 } 6991 6992 /* Copy selected n entries with indices in idx[] of A to v[]. 6993 If idx is NULL, copy the whole data array of A to v[] 6994 */ 6995 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6996 { 6997 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6998 6999 PetscFunctionBegin; 7000 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7001 if (f) { 7002 PetscCall((*f)(A, n, idx, v)); 7003 } else { 7004 const PetscScalar *vv; 7005 7006 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7007 if (n && idx) { 7008 PetscScalar *w = v; 7009 const PetscInt *oi = idx; 7010 PetscInt j; 7011 7012 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7013 } else { 7014 PetscCall(PetscArraycpy(v, vv, n)); 7015 } 7016 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7017 } 7018 PetscFunctionReturn(PETSC_SUCCESS); 7019 } 7020 7021 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7022 { 7023 MatMatMPIAIJBACKEND *mmdata; 7024 PetscInt i, n_d, n_o; 7025 7026 PetscFunctionBegin; 7027 MatCheckProduct(C, 1); 7028 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7029 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7030 if (!mmdata->reusesym) { /* update temporary matrices */ 7031 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7032 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7033 } 7034 mmdata->reusesym = PETSC_FALSE; 7035 7036 for (i = 0; i < mmdata->cp; i++) { 7037 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7038 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7039 } 7040 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7041 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7042 7043 if (mmdata->mptmp[i]) continue; 7044 if (noff) { 7045 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7046 7047 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7048 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7049 n_o += noff; 7050 n_d += nown; 7051 } else { 7052 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7053 7054 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7055 n_d += mm->nz; 7056 } 7057 } 7058 if (mmdata->hasoffproc) { /* offprocess insertion */ 7059 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7060 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7061 } 7062 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7063 PetscFunctionReturn(PETSC_SUCCESS); 7064 } 7065 7066 /* Support for Pt * A, A * P, or Pt * A * P */ 7067 #define MAX_NUMBER_INTERMEDIATE 4 7068 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7069 { 7070 Mat_Product *product = C->product; 7071 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7072 Mat_MPIAIJ *a, *p; 7073 MatMatMPIAIJBACKEND *mmdata; 7074 ISLocalToGlobalMapping P_oth_l2g = NULL; 7075 IS glob = NULL; 7076 const char *prefix; 7077 char pprefix[256]; 7078 const PetscInt *globidx, *P_oth_idx; 7079 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7080 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7081 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7082 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7083 /* a base offset; type-2: sparse with a local to global map table */ 7084 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7085 7086 MatProductType ptype; 7087 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7088 PetscMPIInt size; 7089 7090 PetscFunctionBegin; 7091 MatCheckProduct(C, 1); 7092 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7093 ptype = product->type; 7094 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7095 ptype = MATPRODUCT_AB; 7096 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7097 } 7098 switch (ptype) { 7099 case MATPRODUCT_AB: 7100 A = product->A; 7101 P = product->B; 7102 m = A->rmap->n; 7103 n = P->cmap->n; 7104 M = A->rmap->N; 7105 N = P->cmap->N; 7106 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7107 break; 7108 case MATPRODUCT_AtB: 7109 P = product->A; 7110 A = product->B; 7111 m = P->cmap->n; 7112 n = A->cmap->n; 7113 M = P->cmap->N; 7114 N = A->cmap->N; 7115 hasoffproc = PETSC_TRUE; 7116 break; 7117 case MATPRODUCT_PtAP: 7118 A = product->A; 7119 P = product->B; 7120 m = P->cmap->n; 7121 n = P->cmap->n; 7122 M = P->cmap->N; 7123 N = P->cmap->N; 7124 hasoffproc = PETSC_TRUE; 7125 break; 7126 default: 7127 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7128 } 7129 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7130 if (size == 1) hasoffproc = PETSC_FALSE; 7131 7132 /* defaults */ 7133 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7134 mp[i] = NULL; 7135 mptmp[i] = PETSC_FALSE; 7136 rmapt[i] = -1; 7137 cmapt[i] = -1; 7138 rmapa[i] = NULL; 7139 cmapa[i] = NULL; 7140 } 7141 7142 /* customization */ 7143 PetscCall(PetscNew(&mmdata)); 7144 mmdata->reusesym = product->api_user; 7145 if (ptype == MATPRODUCT_AB) { 7146 if (product->api_user) { 7147 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7148 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7149 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7150 PetscOptionsEnd(); 7151 } else { 7152 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7153 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7154 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7155 PetscOptionsEnd(); 7156 } 7157 } else if (ptype == MATPRODUCT_PtAP) { 7158 if (product->api_user) { 7159 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7160 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7161 PetscOptionsEnd(); 7162 } else { 7163 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7164 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7165 PetscOptionsEnd(); 7166 } 7167 } 7168 a = (Mat_MPIAIJ *)A->data; 7169 p = (Mat_MPIAIJ *)P->data; 7170 PetscCall(MatSetSizes(C, m, n, M, N)); 7171 PetscCall(PetscLayoutSetUp(C->rmap)); 7172 PetscCall(PetscLayoutSetUp(C->cmap)); 7173 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7174 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7175 7176 cp = 0; 7177 switch (ptype) { 7178 case MATPRODUCT_AB: /* A * P */ 7179 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7180 7181 /* A_diag * P_local (merged or not) */ 7182 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7183 /* P is product->B */ 7184 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7185 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7186 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7187 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7188 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7189 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7190 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7191 mp[cp]->product->api_user = product->api_user; 7192 PetscCall(MatProductSetFromOptions(mp[cp])); 7193 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7194 PetscCall(ISGetIndices(glob, &globidx)); 7195 rmapt[cp] = 1; 7196 cmapt[cp] = 2; 7197 cmapa[cp] = globidx; 7198 mptmp[cp] = PETSC_FALSE; 7199 cp++; 7200 } else { /* A_diag * P_diag and A_diag * P_off */ 7201 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7202 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7203 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7204 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7205 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7206 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7207 mp[cp]->product->api_user = product->api_user; 7208 PetscCall(MatProductSetFromOptions(mp[cp])); 7209 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7210 rmapt[cp] = 1; 7211 cmapt[cp] = 1; 7212 mptmp[cp] = PETSC_FALSE; 7213 cp++; 7214 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7215 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7216 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7217 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7218 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7219 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7220 mp[cp]->product->api_user = product->api_user; 7221 PetscCall(MatProductSetFromOptions(mp[cp])); 7222 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7223 rmapt[cp] = 1; 7224 cmapt[cp] = 2; 7225 cmapa[cp] = p->garray; 7226 mptmp[cp] = PETSC_FALSE; 7227 cp++; 7228 } 7229 7230 /* A_off * P_other */ 7231 if (mmdata->P_oth) { 7232 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7233 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7234 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7235 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7236 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7237 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7238 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7239 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7240 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7241 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7242 mp[cp]->product->api_user = product->api_user; 7243 PetscCall(MatProductSetFromOptions(mp[cp])); 7244 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7245 rmapt[cp] = 1; 7246 cmapt[cp] = 2; 7247 cmapa[cp] = P_oth_idx; 7248 mptmp[cp] = PETSC_FALSE; 7249 cp++; 7250 } 7251 break; 7252 7253 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7254 /* A is product->B */ 7255 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7256 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7257 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7258 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7259 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7260 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7261 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7262 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7263 mp[cp]->product->api_user = product->api_user; 7264 PetscCall(MatProductSetFromOptions(mp[cp])); 7265 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7266 PetscCall(ISGetIndices(glob, &globidx)); 7267 rmapt[cp] = 2; 7268 rmapa[cp] = globidx; 7269 cmapt[cp] = 2; 7270 cmapa[cp] = globidx; 7271 mptmp[cp] = PETSC_FALSE; 7272 cp++; 7273 } else { 7274 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7275 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7276 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7277 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7278 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7279 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7280 mp[cp]->product->api_user = product->api_user; 7281 PetscCall(MatProductSetFromOptions(mp[cp])); 7282 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7283 PetscCall(ISGetIndices(glob, &globidx)); 7284 rmapt[cp] = 1; 7285 cmapt[cp] = 2; 7286 cmapa[cp] = globidx; 7287 mptmp[cp] = PETSC_FALSE; 7288 cp++; 7289 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7290 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7291 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7292 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7293 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7294 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7295 mp[cp]->product->api_user = product->api_user; 7296 PetscCall(MatProductSetFromOptions(mp[cp])); 7297 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7298 rmapt[cp] = 2; 7299 rmapa[cp] = p->garray; 7300 cmapt[cp] = 2; 7301 cmapa[cp] = globidx; 7302 mptmp[cp] = PETSC_FALSE; 7303 cp++; 7304 } 7305 break; 7306 case MATPRODUCT_PtAP: 7307 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7308 /* P is product->B */ 7309 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7310 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7311 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7312 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7313 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7314 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7315 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7316 mp[cp]->product->api_user = product->api_user; 7317 PetscCall(MatProductSetFromOptions(mp[cp])); 7318 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7319 PetscCall(ISGetIndices(glob, &globidx)); 7320 rmapt[cp] = 2; 7321 rmapa[cp] = globidx; 7322 cmapt[cp] = 2; 7323 cmapa[cp] = globidx; 7324 mptmp[cp] = PETSC_FALSE; 7325 cp++; 7326 if (mmdata->P_oth) { 7327 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7328 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7329 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7330 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7331 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7332 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7333 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7334 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7335 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7336 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7337 mp[cp]->product->api_user = product->api_user; 7338 PetscCall(MatProductSetFromOptions(mp[cp])); 7339 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7340 mptmp[cp] = PETSC_TRUE; 7341 cp++; 7342 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7343 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7344 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7345 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7346 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7347 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7348 mp[cp]->product->api_user = product->api_user; 7349 PetscCall(MatProductSetFromOptions(mp[cp])); 7350 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7351 rmapt[cp] = 2; 7352 rmapa[cp] = globidx; 7353 cmapt[cp] = 2; 7354 cmapa[cp] = P_oth_idx; 7355 mptmp[cp] = PETSC_FALSE; 7356 cp++; 7357 } 7358 break; 7359 default: 7360 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7361 } 7362 /* sanity check */ 7363 if (size > 1) 7364 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7365 7366 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7367 for (i = 0; i < cp; i++) { 7368 mmdata->mp[i] = mp[i]; 7369 mmdata->mptmp[i] = mptmp[i]; 7370 } 7371 mmdata->cp = cp; 7372 C->product->data = mmdata; 7373 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7374 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7375 7376 /* memory type */ 7377 mmdata->mtype = PETSC_MEMTYPE_HOST; 7378 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7379 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7380 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7381 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7382 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7383 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7384 7385 /* prepare coo coordinates for values insertion */ 7386 7387 /* count total nonzeros of those intermediate seqaij Mats 7388 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7389 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7390 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7391 */ 7392 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7393 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7394 if (mptmp[cp]) continue; 7395 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7396 const PetscInt *rmap = rmapa[cp]; 7397 const PetscInt mr = mp[cp]->rmap->n; 7398 const PetscInt rs = C->rmap->rstart; 7399 const PetscInt re = C->rmap->rend; 7400 const PetscInt *ii = mm->i; 7401 for (i = 0; i < mr; i++) { 7402 const PetscInt gr = rmap[i]; 7403 const PetscInt nz = ii[i + 1] - ii[i]; 7404 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7405 else ncoo_oown += nz; /* this row is local */ 7406 } 7407 } else ncoo_d += mm->nz; 7408 } 7409 7410 /* 7411 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7412 7413 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7414 7415 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7416 7417 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7418 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7419 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7420 7421 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7422 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7423 */ 7424 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7425 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7426 7427 /* gather (i,j) of nonzeros inserted by remote procs */ 7428 if (hasoffproc) { 7429 PetscSF msf; 7430 PetscInt ncoo2, *coo_i2, *coo_j2; 7431 7432 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7433 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7434 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7435 7436 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7437 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7438 PetscInt *idxoff = mmdata->off[cp]; 7439 PetscInt *idxown = mmdata->own[cp]; 7440 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7441 const PetscInt *rmap = rmapa[cp]; 7442 const PetscInt *cmap = cmapa[cp]; 7443 const PetscInt *ii = mm->i; 7444 PetscInt *coi = coo_i + ncoo_o; 7445 PetscInt *coj = coo_j + ncoo_o; 7446 const PetscInt mr = mp[cp]->rmap->n; 7447 const PetscInt rs = C->rmap->rstart; 7448 const PetscInt re = C->rmap->rend; 7449 const PetscInt cs = C->cmap->rstart; 7450 for (i = 0; i < mr; i++) { 7451 const PetscInt *jj = mm->j + ii[i]; 7452 const PetscInt gr = rmap[i]; 7453 const PetscInt nz = ii[i + 1] - ii[i]; 7454 if (gr < rs || gr >= re) { /* this is an offproc row */ 7455 for (j = ii[i]; j < ii[i + 1]; j++) { 7456 *coi++ = gr; 7457 *idxoff++ = j; 7458 } 7459 if (!cmapt[cp]) { /* already global */ 7460 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7461 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7462 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7463 } else { /* offdiag */ 7464 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7465 } 7466 ncoo_o += nz; 7467 } else { /* this is a local row */ 7468 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7469 } 7470 } 7471 } 7472 mmdata->off[cp + 1] = idxoff; 7473 mmdata->own[cp + 1] = idxown; 7474 } 7475 7476 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7477 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7478 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7479 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7480 ncoo = ncoo_d + ncoo_oown + ncoo2; 7481 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7482 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7483 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7484 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7485 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7486 PetscCall(PetscFree2(coo_i, coo_j)); 7487 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7488 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7489 coo_i = coo_i2; 7490 coo_j = coo_j2; 7491 } else { /* no offproc values insertion */ 7492 ncoo = ncoo_d; 7493 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7494 7495 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7496 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7497 PetscCall(PetscSFSetUp(mmdata->sf)); 7498 } 7499 mmdata->hasoffproc = hasoffproc; 7500 7501 /* gather (i,j) of nonzeros inserted locally */ 7502 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7503 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7504 PetscInt *coi = coo_i + ncoo_d; 7505 PetscInt *coj = coo_j + ncoo_d; 7506 const PetscInt *jj = mm->j; 7507 const PetscInt *ii = mm->i; 7508 const PetscInt *cmap = cmapa[cp]; 7509 const PetscInt *rmap = rmapa[cp]; 7510 const PetscInt mr = mp[cp]->rmap->n; 7511 const PetscInt rs = C->rmap->rstart; 7512 const PetscInt re = C->rmap->rend; 7513 const PetscInt cs = C->cmap->rstart; 7514 7515 if (mptmp[cp]) continue; 7516 if (rmapt[cp] == 1) { /* consecutive rows */ 7517 /* fill coo_i */ 7518 for (i = 0; i < mr; i++) { 7519 const PetscInt gr = i + rs; 7520 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7521 } 7522 /* fill coo_j */ 7523 if (!cmapt[cp]) { /* type-0, already global */ 7524 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7525 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7526 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7527 } else { /* type-2, local to global for sparse columns */ 7528 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7529 } 7530 ncoo_d += mm->nz; 7531 } else if (rmapt[cp] == 2) { /* sparse rows */ 7532 for (i = 0; i < mr; i++) { 7533 const PetscInt *jj = mm->j + ii[i]; 7534 const PetscInt gr = rmap[i]; 7535 const PetscInt nz = ii[i + 1] - ii[i]; 7536 if (gr >= rs && gr < re) { /* local rows */ 7537 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7538 if (!cmapt[cp]) { /* type-0, already global */ 7539 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7540 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7541 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7542 } else { /* type-2, local to global for sparse columns */ 7543 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7544 } 7545 ncoo_d += nz; 7546 } 7547 } 7548 } 7549 } 7550 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7551 PetscCall(ISDestroy(&glob)); 7552 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7553 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7554 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7555 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7556 7557 /* preallocate with COO data */ 7558 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7559 PetscCall(PetscFree2(coo_i, coo_j)); 7560 PetscFunctionReturn(PETSC_SUCCESS); 7561 } 7562 7563 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7564 { 7565 Mat_Product *product = mat->product; 7566 #if defined(PETSC_HAVE_DEVICE) 7567 PetscBool match = PETSC_FALSE; 7568 PetscBool usecpu = PETSC_FALSE; 7569 #else 7570 PetscBool match = PETSC_TRUE; 7571 #endif 7572 7573 PetscFunctionBegin; 7574 MatCheckProduct(mat, 1); 7575 #if defined(PETSC_HAVE_DEVICE) 7576 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7577 if (match) { /* we can always fallback to the CPU if requested */ 7578 switch (product->type) { 7579 case MATPRODUCT_AB: 7580 if (product->api_user) { 7581 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7582 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7583 PetscOptionsEnd(); 7584 } else { 7585 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7586 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7587 PetscOptionsEnd(); 7588 } 7589 break; 7590 case MATPRODUCT_AtB: 7591 if (product->api_user) { 7592 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7593 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7594 PetscOptionsEnd(); 7595 } else { 7596 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7597 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7598 PetscOptionsEnd(); 7599 } 7600 break; 7601 case MATPRODUCT_PtAP: 7602 if (product->api_user) { 7603 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7604 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7605 PetscOptionsEnd(); 7606 } else { 7607 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7608 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7609 PetscOptionsEnd(); 7610 } 7611 break; 7612 default: 7613 break; 7614 } 7615 match = (PetscBool)!usecpu; 7616 } 7617 #endif 7618 if (match) { 7619 switch (product->type) { 7620 case MATPRODUCT_AB: 7621 case MATPRODUCT_AtB: 7622 case MATPRODUCT_PtAP: 7623 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7624 break; 7625 default: 7626 break; 7627 } 7628 } 7629 /* fallback to MPIAIJ ops */ 7630 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7631 PetscFunctionReturn(PETSC_SUCCESS); 7632 } 7633 7634 /* 7635 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7636 7637 n - the number of block indices in cc[] 7638 cc - the block indices (must be large enough to contain the indices) 7639 */ 7640 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7641 { 7642 PetscInt cnt = -1, nidx, j; 7643 const PetscInt *idx; 7644 7645 PetscFunctionBegin; 7646 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7647 if (nidx) { 7648 cnt = 0; 7649 cc[cnt] = idx[0] / bs; 7650 for (j = 1; j < nidx; j++) { 7651 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7652 } 7653 } 7654 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7655 *n = cnt + 1; 7656 PetscFunctionReturn(PETSC_SUCCESS); 7657 } 7658 7659 /* 7660 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7661 7662 ncollapsed - the number of block indices 7663 collapsed - the block indices (must be large enough to contain the indices) 7664 */ 7665 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7666 { 7667 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7668 7669 PetscFunctionBegin; 7670 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7671 for (i = start + 1; i < start + bs; i++) { 7672 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7673 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7674 cprevtmp = cprev; 7675 cprev = merged; 7676 merged = cprevtmp; 7677 } 7678 *ncollapsed = nprev; 7679 if (collapsed) *collapsed = cprev; 7680 PetscFunctionReturn(PETSC_SUCCESS); 7681 } 7682 7683 /* 7684 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7685 */ 7686 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7687 { 7688 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7689 Mat tGmat; 7690 MPI_Comm comm; 7691 const PetscScalar *vals; 7692 const PetscInt *idx; 7693 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7694 MatScalar *AA; // this is checked in graph 7695 PetscBool isseqaij; 7696 Mat a, b, c; 7697 MatType jtype; 7698 7699 PetscFunctionBegin; 7700 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7701 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7702 PetscCall(MatGetType(Gmat, &jtype)); 7703 PetscCall(MatCreate(comm, &tGmat)); 7704 PetscCall(MatSetType(tGmat, jtype)); 7705 7706 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7707 Also, if the matrix is symmetric, can we skip this 7708 operation? It can be very expensive on large matrices. */ 7709 7710 // global sizes 7711 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7712 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7713 nloc = Iend - Istart; 7714 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7715 if (isseqaij) { 7716 a = Gmat; 7717 b = NULL; 7718 } else { 7719 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7720 a = d->A; 7721 b = d->B; 7722 garray = d->garray; 7723 } 7724 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7725 for (PetscInt row = 0; row < nloc; row++) { 7726 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7727 d_nnz[row] = ncols; 7728 if (ncols > maxcols) maxcols = ncols; 7729 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7730 } 7731 if (b) { 7732 for (PetscInt row = 0; row < nloc; row++) { 7733 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7734 o_nnz[row] = ncols; 7735 if (ncols > maxcols) maxcols = ncols; 7736 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7737 } 7738 } 7739 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7740 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7741 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7742 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7743 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7744 PetscCall(PetscFree2(d_nnz, o_nnz)); 7745 // 7746 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7747 nnz0 = nnz1 = 0; 7748 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7749 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7750 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7751 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7752 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7753 if (PetscRealPart(sv) > vfilter) { 7754 nnz1++; 7755 PetscInt cid = idx[jj] + Istart; //diag 7756 if (c != a) cid = garray[idx[jj]]; 7757 AA[ncol_row] = vals[jj]; 7758 AJ[ncol_row] = cid; 7759 ncol_row++; 7760 } 7761 } 7762 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7763 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7764 } 7765 } 7766 PetscCall(PetscFree2(AA, AJ)); 7767 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7768 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7769 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7770 7771 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7772 7773 *filteredG = tGmat; 7774 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7775 PetscFunctionReturn(PETSC_SUCCESS); 7776 } 7777 7778 /* 7779 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7780 7781 Input Parameter: 7782 . Amat - matrix 7783 - symmetrize - make the result symmetric 7784 + scale - scale with diagonal 7785 7786 Output Parameter: 7787 . a_Gmat - output scalar graph >= 0 7788 7789 */ 7790 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7791 { 7792 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7793 MPI_Comm comm; 7794 Mat Gmat; 7795 PetscBool ismpiaij, isseqaij; 7796 Mat a, b, c; 7797 MatType jtype; 7798 7799 PetscFunctionBegin; 7800 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7801 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7802 PetscCall(MatGetSize(Amat, &MM, &NN)); 7803 PetscCall(MatGetBlockSize(Amat, &bs)); 7804 nloc = (Iend - Istart) / bs; 7805 7806 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7807 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7808 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7809 7810 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7811 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7812 implementation */ 7813 if (bs > 1) { 7814 PetscCall(MatGetType(Amat, &jtype)); 7815 PetscCall(MatCreate(comm, &Gmat)); 7816 PetscCall(MatSetType(Gmat, jtype)); 7817 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7818 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7819 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7820 PetscInt *d_nnz, *o_nnz; 7821 MatScalar *aa, val, *AA; 7822 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7823 if (isseqaij) { 7824 a = Amat; 7825 b = NULL; 7826 } else { 7827 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7828 a = d->A; 7829 b = d->B; 7830 } 7831 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7832 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7833 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7834 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7835 const PetscInt *cols1, *cols2; 7836 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7837 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7838 nnz[brow / bs] = nc2 / bs; 7839 if (nc2 % bs) ok = 0; 7840 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7841 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7842 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7843 if (nc1 != nc2) ok = 0; 7844 else { 7845 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7846 if (cols1[jj] != cols2[jj]) ok = 0; 7847 if (cols1[jj] % bs != jj % bs) ok = 0; 7848 } 7849 } 7850 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7851 } 7852 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7853 if (!ok) { 7854 PetscCall(PetscFree2(d_nnz, o_nnz)); 7855 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7856 goto old_bs; 7857 } 7858 } 7859 } 7860 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7861 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7862 PetscCall(PetscFree2(d_nnz, o_nnz)); 7863 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7864 // diag 7865 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7866 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7867 ai = aseq->i; 7868 n = ai[brow + 1] - ai[brow]; 7869 aj = aseq->j + ai[brow]; 7870 for (int k = 0; k < n; k += bs) { // block columns 7871 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7872 val = 0; 7873 for (int ii = 0; ii < bs; ii++) { // rows in block 7874 aa = aseq->a + ai[brow + ii] + k; 7875 for (int jj = 0; jj < bs; jj++) { // columns in block 7876 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7877 } 7878 } 7879 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7880 AA[k / bs] = val; 7881 } 7882 grow = Istart / bs + brow / bs; 7883 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7884 } 7885 // off-diag 7886 if (ismpiaij) { 7887 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7888 const PetscScalar *vals; 7889 const PetscInt *cols, *garray = aij->garray; 7890 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7891 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7892 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7893 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7894 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7895 AA[k / bs] = 0; 7896 AJ[cidx] = garray[cols[k]] / bs; 7897 } 7898 nc = ncols / bs; 7899 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7900 for (int ii = 0; ii < bs; ii++) { // rows in block 7901 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7902 for (int k = 0; k < ncols; k += bs) { 7903 for (int jj = 0; jj < bs; jj++) { // cols in block 7904 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7905 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7906 } 7907 } 7908 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7909 } 7910 grow = Istart / bs + brow / bs; 7911 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7912 } 7913 } 7914 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7915 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7916 PetscCall(PetscFree2(AA, AJ)); 7917 } else { 7918 const PetscScalar *vals; 7919 const PetscInt *idx; 7920 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7921 old_bs: 7922 /* 7923 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7924 */ 7925 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7926 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7927 if (isseqaij) { 7928 PetscInt max_d_nnz; 7929 /* 7930 Determine exact preallocation count for (sequential) scalar matrix 7931 */ 7932 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7933 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7934 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7935 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7936 PetscCall(PetscFree3(w0, w1, w2)); 7937 } else if (ismpiaij) { 7938 Mat Daij, Oaij; 7939 const PetscInt *garray; 7940 PetscInt max_d_nnz; 7941 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7942 /* 7943 Determine exact preallocation count for diagonal block portion of scalar matrix 7944 */ 7945 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7946 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7947 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7948 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7949 PetscCall(PetscFree3(w0, w1, w2)); 7950 /* 7951 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7952 */ 7953 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7954 o_nnz[jj] = 0; 7955 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7956 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7957 o_nnz[jj] += ncols; 7958 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7959 } 7960 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7961 } 7962 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7963 /* get scalar copy (norms) of matrix */ 7964 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7965 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7966 PetscCall(PetscFree2(d_nnz, o_nnz)); 7967 for (Ii = Istart; Ii < Iend; Ii++) { 7968 PetscInt dest_row = Ii / bs; 7969 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7970 for (jj = 0; jj < ncols; jj++) { 7971 PetscInt dest_col = idx[jj] / bs; 7972 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7973 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7974 } 7975 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7976 } 7977 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7978 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7979 } 7980 } else { 7981 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7982 else { 7983 Gmat = Amat; 7984 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7985 } 7986 if (isseqaij) { 7987 a = Gmat; 7988 b = NULL; 7989 } else { 7990 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7991 a = d->A; 7992 b = d->B; 7993 } 7994 if (filter >= 0 || scale) { 7995 /* take absolute value of each entry */ 7996 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7997 MatInfo info; 7998 PetscScalar *avals; 7999 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8000 PetscCall(MatSeqAIJGetArray(c, &avals)); 8001 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8002 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8003 } 8004 } 8005 } 8006 if (symmetrize) { 8007 PetscBool isset, issym; 8008 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8009 if (!isset || !issym) { 8010 Mat matTrans; 8011 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8012 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8013 PetscCall(MatDestroy(&matTrans)); 8014 } 8015 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8016 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8017 if (scale) { 8018 /* scale c for all diagonal values = 1 or -1 */ 8019 Vec diag; 8020 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8021 PetscCall(MatGetDiagonal(Gmat, diag)); 8022 PetscCall(VecReciprocal(diag)); 8023 PetscCall(VecSqrtAbs(diag)); 8024 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8025 PetscCall(VecDestroy(&diag)); 8026 } 8027 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8028 8029 if (filter >= 0) { 8030 Mat Fmat = NULL; /* some silly compiler needs this */ 8031 8032 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 8033 PetscCall(MatDestroy(&Gmat)); 8034 Gmat = Fmat; 8035 } 8036 *a_Gmat = Gmat; 8037 PetscFunctionReturn(PETSC_SUCCESS); 8038 } 8039 8040 /* 8041 Special version for direct calls from Fortran 8042 */ 8043 #include <petsc/private/fortranimpl.h> 8044 8045 /* Change these macros so can be used in void function */ 8046 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8047 #undef PetscCall 8048 #define PetscCall(...) \ 8049 do { \ 8050 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8051 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8052 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8053 return; \ 8054 } \ 8055 } while (0) 8056 8057 #undef SETERRQ 8058 #define SETERRQ(comm, ierr, ...) \ 8059 do { \ 8060 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8061 return; \ 8062 } while (0) 8063 8064 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8065 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8066 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8067 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8068 #else 8069 #endif 8070 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8071 { 8072 Mat mat = *mmat; 8073 PetscInt m = *mm, n = *mn; 8074 InsertMode addv = *maddv; 8075 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8076 PetscScalar value; 8077 8078 MatCheckPreallocated(mat, 1); 8079 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8080 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8081 { 8082 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8083 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8084 PetscBool roworiented = aij->roworiented; 8085 8086 /* Some Variables required in the macro */ 8087 Mat A = aij->A; 8088 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8089 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8090 MatScalar *aa; 8091 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8092 Mat B = aij->B; 8093 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8094 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8095 MatScalar *ba; 8096 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8097 * cannot use "#if defined" inside a macro. */ 8098 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8099 8100 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8101 PetscInt nonew = a->nonew; 8102 MatScalar *ap1, *ap2; 8103 8104 PetscFunctionBegin; 8105 PetscCall(MatSeqAIJGetArray(A, &aa)); 8106 PetscCall(MatSeqAIJGetArray(B, &ba)); 8107 for (i = 0; i < m; i++) { 8108 if (im[i] < 0) continue; 8109 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8110 if (im[i] >= rstart && im[i] < rend) { 8111 row = im[i] - rstart; 8112 lastcol1 = -1; 8113 rp1 = aj + ai[row]; 8114 ap1 = aa + ai[row]; 8115 rmax1 = aimax[row]; 8116 nrow1 = ailen[row]; 8117 low1 = 0; 8118 high1 = nrow1; 8119 lastcol2 = -1; 8120 rp2 = bj + bi[row]; 8121 ap2 = ba + bi[row]; 8122 rmax2 = bimax[row]; 8123 nrow2 = bilen[row]; 8124 low2 = 0; 8125 high2 = nrow2; 8126 8127 for (j = 0; j < n; j++) { 8128 if (roworiented) value = v[i * n + j]; 8129 else value = v[i + j * m]; 8130 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8131 if (in[j] >= cstart && in[j] < cend) { 8132 col = in[j] - cstart; 8133 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8134 } else if (in[j] < 0) continue; 8135 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8136 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8137 } else { 8138 if (mat->was_assembled) { 8139 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8140 #if defined(PETSC_USE_CTABLE) 8141 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8142 col--; 8143 #else 8144 col = aij->colmap[in[j]] - 1; 8145 #endif 8146 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8147 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8148 col = in[j]; 8149 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8150 B = aij->B; 8151 b = (Mat_SeqAIJ *)B->data; 8152 bimax = b->imax; 8153 bi = b->i; 8154 bilen = b->ilen; 8155 bj = b->j; 8156 rp2 = bj + bi[row]; 8157 ap2 = ba + bi[row]; 8158 rmax2 = bimax[row]; 8159 nrow2 = bilen[row]; 8160 low2 = 0; 8161 high2 = nrow2; 8162 bm = aij->B->rmap->n; 8163 ba = b->a; 8164 inserted = PETSC_FALSE; 8165 } 8166 } else col = in[j]; 8167 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8168 } 8169 } 8170 } else if (!aij->donotstash) { 8171 if (roworiented) { 8172 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8173 } else { 8174 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8175 } 8176 } 8177 } 8178 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8179 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8180 } 8181 PetscFunctionReturnVoid(); 8182 } 8183 8184 /* Undefining these here since they were redefined from their original definition above! No 8185 * other PETSc functions should be defined past this point, as it is impossible to recover the 8186 * original definitions */ 8187 #undef PetscCall 8188 #undef SETERRQ 8189