1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 167 PetscFunctionReturn(PETSC_SUCCESS); 168 } 169 170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 171 { 172 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 173 174 PetscFunctionBegin; 175 if (mat->A) { 176 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 177 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 178 } 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181 182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 183 { 184 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 186 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 187 const PetscInt *ia, *ib; 188 const MatScalar *aa, *bb, *aav, *bav; 189 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 190 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 191 192 PetscFunctionBegin; 193 *keptrows = NULL; 194 195 ia = a->i; 196 ib = b->i; 197 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 198 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 199 for (i = 0; i < m; i++) { 200 na = ia[i + 1] - ia[i]; 201 nb = ib[i + 1] - ib[i]; 202 if (!na && !nb) { 203 cnt++; 204 goto ok1; 205 } 206 aa = aav + ia[i]; 207 for (j = 0; j < na; j++) { 208 if (aa[j] != 0.0) goto ok1; 209 } 210 bb = bav ? bav + ib[i] : NULL; 211 for (j = 0; j < nb; j++) { 212 if (bb[j] != 0.0) goto ok1; 213 } 214 cnt++; 215 ok1:; 216 } 217 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 218 if (!n0rows) { 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 220 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 221 PetscFunctionReturn(PETSC_SUCCESS); 222 } 223 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 224 cnt = 0; 225 for (i = 0; i < m; i++) { 226 na = ia[i + 1] - ia[i]; 227 nb = ib[i + 1] - ib[i]; 228 if (!na && !nb) continue; 229 aa = aav + ia[i]; 230 for (j = 0; j < na; j++) { 231 if (aa[j] != 0.0) { 232 rows[cnt++] = rstart + i; 233 goto ok2; 234 } 235 } 236 bb = bav ? bav + ib[i] : NULL; 237 for (j = 0; j < nb; j++) { 238 if (bb[j] != 0.0) { 239 rows[cnt++] = rstart + i; 240 goto ok2; 241 } 242 } 243 ok2:; 244 } 245 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 247 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 252 { 253 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 254 PetscBool cong; 255 256 PetscFunctionBegin; 257 PetscCall(MatHasCongruentLayouts(Y, &cong)); 258 if (Y->assembled && cong) { 259 PetscCall(MatDiagonalSet(aij->A, D, is)); 260 } else { 261 PetscCall(MatDiagonalSet_Default(Y, D, is)); 262 } 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 267 { 268 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 269 PetscInt i, rstart, nrows, *rows; 270 271 PetscFunctionBegin; 272 *zrows = NULL; 273 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 274 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 275 for (i = 0; i < nrows; i++) rows[i] += rstart; 276 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 283 PetscInt i, m, n, *garray = aij->garray; 284 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 285 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 286 PetscReal *work; 287 const PetscScalar *dummy; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 if (type == NORM_INFINITY) { 313 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 314 } else { 315 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 316 } 317 PetscCall(PetscFree(work)); 318 if (type == NORM_2) { 319 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 320 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 321 for (i = 0; i < n; i++) reductions[i] /= m; 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 327 { 328 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 329 IS sis, gis; 330 const PetscInt *isis, *igis; 331 PetscInt n, *iis, nsis, ngis, rstart, i; 332 333 PetscFunctionBegin; 334 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 335 PetscCall(MatFindNonzeroRows(a->B, &gis)); 336 PetscCall(ISGetSize(gis, &ngis)); 337 PetscCall(ISGetSize(sis, &nsis)); 338 PetscCall(ISGetIndices(sis, &isis)); 339 PetscCall(ISGetIndices(gis, &igis)); 340 341 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 342 PetscCall(PetscArraycpy(iis, igis, ngis)); 343 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 344 n = ngis + nsis; 345 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 346 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 347 for (i = 0; i < n; i++) iis[i] += rstart; 348 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 349 350 PetscCall(ISRestoreIndices(sis, &isis)); 351 PetscCall(ISRestoreIndices(gis, &igis)); 352 PetscCall(ISDestroy(&sis)); 353 PetscCall(ISDestroy(&gis)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 /* 358 Local utility routine that creates a mapping from the global column 359 number to the local number in the off-diagonal part of the local 360 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 361 a slightly higher hash table cost; without it it is not scalable (each processor 362 has an order N integer array but is fast to access. 363 */ 364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 365 { 366 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 367 PetscInt n = aij->B->cmap->n, i; 368 369 PetscFunctionBegin; 370 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 371 #if defined(PETSC_USE_CTABLE) 372 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 373 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 374 #else 375 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 376 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 377 #endif 378 PetscFunctionReturn(PETSC_SUCCESS); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 382 do { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col; \ 386 while (high1 - low1 > 5) { \ 387 t = (low1 + high1) / 2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i = low1; _i < high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) { \ 395 ap1[_i] += value; \ 396 /* Not sure LogFlops will slow dow the code or not */ \ 397 (void)PetscLogFlops(1.0); \ 398 } else ap1[_i] = value; \ 399 goto a_noinsert; \ 400 } \ 401 } \ 402 if (value == 0.0 && ignorezeroentries && row != col) { \ 403 low1 = 0; \ 404 high1 = nrow1; \ 405 goto a_noinsert; \ 406 } \ 407 if (nonew == 1) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 413 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 414 N = nrow1++ - 1; \ 415 a->nz++; \ 416 high1++; \ 417 /* shift up all the later entries in this row */ \ 418 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 419 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 B->nonzerostate++; \ 468 b_noinsert:; \ 469 bilen[row] = nrow2; \ 470 } while (0) 471 472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 473 { 474 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 475 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 476 PetscInt l, *garray = mat->garray, diag; 477 PetscScalar *aa, *ba; 478 479 PetscFunctionBegin; 480 /* code only works for square matrices A */ 481 482 /* find size of row to the left of the diagonal part */ 483 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 484 row = row - diag; 485 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 486 if (garray[b->j[b->i[row] + l]] > diag) break; 487 } 488 if (l) { 489 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 490 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 491 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 492 } 493 494 /* diagonal part */ 495 if (a->i[row + 1] - a->i[row]) { 496 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 497 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 498 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 499 } 500 501 /* right of diagonal part */ 502 if (b->i[row + 1] - b->i[row] - l) { 503 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 504 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 505 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 506 } 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 511 { 512 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 513 PetscScalar value = 0.0; 514 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 515 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 516 PetscBool roworiented = aij->roworiented; 517 518 /* Some Variables required in the macro */ 519 Mat A = aij->A; 520 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 521 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 522 PetscBool ignorezeroentries = a->ignorezeroentries; 523 Mat B = aij->B; 524 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 525 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 526 MatScalar *aa, *ba; 527 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 528 PetscInt nonew; 529 MatScalar *ap1, *ap2; 530 531 PetscFunctionBegin; 532 PetscCall(MatSeqAIJGetArray(A, &aa)); 533 PetscCall(MatSeqAIJGetArray(B, &ba)); 534 for (i = 0; i < m; i++) { 535 if (im[i] < 0) continue; 536 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 537 if (im[i] >= rstart && im[i] < rend) { 538 row = im[i] - rstart; 539 lastcol1 = -1; 540 rp1 = aj ? aj + ai[row] : NULL; 541 ap1 = aa ? aa + ai[row] : NULL; 542 rmax1 = aimax[row]; 543 nrow1 = ailen[row]; 544 low1 = 0; 545 high1 = nrow1; 546 lastcol2 = -1; 547 rp2 = bj ? bj + bi[row] : NULL; 548 ap2 = ba ? ba + bi[row] : NULL; 549 rmax2 = bimax[row]; 550 nrow2 = bilen[row]; 551 low2 = 0; 552 high2 = nrow2; 553 554 for (j = 0; j < n; j++) { 555 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 556 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 557 if (in[j] >= cstart && in[j] < cend) { 558 col = in[j] - cstart; 559 nonew = a->nonew; 560 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 561 } else if (in[j] < 0) { 562 continue; 563 } else { 564 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 565 if (mat->was_assembled) { 566 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 567 #if defined(PETSC_USE_CTABLE) 568 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 569 col--; 570 #else 571 col = aij->colmap[in[j]] - 1; 572 #endif 573 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 574 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 575 col = in[j]; 576 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 577 B = aij->B; 578 b = (Mat_SeqAIJ *)B->data; 579 bimax = b->imax; 580 bi = b->i; 581 bilen = b->ilen; 582 bj = b->j; 583 ba = b->a; 584 rp2 = bj + bi[row]; 585 ap2 = ba + bi[row]; 586 rmax2 = bimax[row]; 587 nrow2 = bilen[row]; 588 low2 = 0; 589 high2 = nrow2; 590 bm = aij->B->rmap->n; 591 ba = b->a; 592 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 593 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 594 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 595 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 596 } 597 } else col = in[j]; 598 nonew = b->nonew; 599 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 600 } 601 } 602 } else { 603 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 604 if (!aij->donotstash) { 605 mat->assembled = PETSC_FALSE; 606 if (roworiented) { 607 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v ? v + i * n : NULL, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 608 } else { 609 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v ? v + i : NULL, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 610 } 611 } 612 } 613 } 614 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 615 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 /* 620 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 621 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 622 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 623 */ 624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 627 Mat A = aij->A; /* diagonal part of the matrix */ 628 Mat B = aij->B; /* offdiagonal part of the matrix */ 629 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 630 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 632 PetscInt *ailen = a->ilen, *aj = a->j; 633 PetscInt *bilen = b->ilen, *bj = b->j; 634 PetscInt am = aij->A->rmap->n, j; 635 PetscInt diag_so_far = 0, dnz; 636 PetscInt offd_so_far = 0, onz; 637 638 PetscFunctionBegin; 639 /* Iterate over all rows of the matrix */ 640 for (j = 0; j < am; j++) { 641 dnz = onz = 0; 642 /* Iterate over all non-zero columns of the current row */ 643 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 644 /* If column is in the diagonal */ 645 if (mat_j[col] >= cstart && mat_j[col] < cend) { 646 aj[diag_so_far++] = mat_j[col] - cstart; 647 dnz++; 648 } else { /* off-diagonal entries */ 649 bj[offd_so_far++] = mat_j[col]; 650 onz++; 651 } 652 } 653 ailen[j] = dnz; 654 bilen[j] = onz; 655 } 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 /* 660 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 661 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 662 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 663 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 664 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 665 */ 666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 669 Mat A = aij->A; /* diagonal part of the matrix */ 670 Mat B = aij->B; /* offdiagonal part of the matrix */ 671 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 673 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 674 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 675 PetscInt *ailen = a->ilen, *aj = a->j; 676 PetscInt *bilen = b->ilen, *bj = b->j; 677 PetscInt am = aij->A->rmap->n, j; 678 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 679 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 680 PetscScalar *aa = a->a, *ba = b->a; 681 682 PetscFunctionBegin; 683 /* Iterate over all rows of the matrix */ 684 for (j = 0; j < am; j++) { 685 dnz_row = onz_row = 0; 686 rowstart_offd = full_offd_i[j]; 687 rowstart_diag = full_diag_i[j]; 688 /* Iterate over all non-zero columns of the current row */ 689 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 690 /* If column is in the diagonal */ 691 if (mat_j[col] >= cstart && mat_j[col] < cend) { 692 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 693 aa[rowstart_diag + dnz_row] = mat_a[col]; 694 dnz_row++; 695 } else { /* off-diagonal entries */ 696 bj[rowstart_offd + onz_row] = mat_j[col]; 697 ba[rowstart_offd + onz_row] = mat_a[col]; 698 onz_row++; 699 } 700 } 701 ailen[j] = dnz_row; 702 bilen[j] = onz_row; 703 } 704 PetscFunctionReturn(PETSC_SUCCESS); 705 } 706 707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 708 { 709 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 710 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 711 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 712 713 PetscFunctionBegin; 714 for (i = 0; i < m; i++) { 715 if (idxm[i] < 0) continue; /* negative row */ 716 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 717 if (idxm[i] >= rstart && idxm[i] < rend) { 718 row = idxm[i] - rstart; 719 for (j = 0; j < n; j++) { 720 if (idxn[j] < 0) continue; /* negative column */ 721 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 722 if (idxn[j] >= cstart && idxn[j] < cend) { 723 col = idxn[j] - cstart; 724 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 725 } else { 726 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 727 #if defined(PETSC_USE_CTABLE) 728 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 729 col--; 730 #else 731 col = aij->colmap[idxn[j]] - 1; 732 #endif 733 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 734 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 735 } 736 } 737 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 738 } 739 PetscFunctionReturn(PETSC_SUCCESS); 740 } 741 742 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 745 PetscInt nstash, reallocs; 746 747 PetscFunctionBegin; 748 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 749 750 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 751 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 752 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 753 PetscFunctionReturn(PETSC_SUCCESS); 754 } 755 756 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 757 { 758 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 759 PetscMPIInt n; 760 PetscInt i, j, rstart, ncols, flg; 761 PetscInt *row, *col; 762 PetscBool other_disassembled; 763 PetscScalar *val; 764 765 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 766 767 PetscFunctionBegin; 768 if (!aij->donotstash && !mat->nooffprocentries) { 769 while (1) { 770 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 771 if (!flg) break; 772 773 for (i = 0; i < n;) { 774 /* Now identify the consecutive vals belonging to the same row */ 775 for (j = i, rstart = row[j]; j < n; j++) { 776 if (row[j] != rstart) break; 777 } 778 if (j < n) ncols = j - i; 779 else ncols = n - i; 780 /* Now assemble all these values with a single function call */ 781 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 782 i = j; 783 } 784 } 785 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 786 } 787 #if defined(PETSC_HAVE_DEVICE) 788 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 789 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 790 if (mat->boundtocpu) { 791 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 792 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 793 } 794 #endif 795 PetscCall(MatAssemblyBegin(aij->A, mode)); 796 PetscCall(MatAssemblyEnd(aij->A, mode)); 797 798 /* determine if any processor has disassembled, if so we must 799 also disassemble ourself, in order that we may reassemble. */ 800 /* 801 if nonzero structure of submatrix B cannot change then we know that 802 no processor disassembled thus we can skip this stuff 803 */ 804 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 805 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 806 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 807 PetscCall(MatDisAssemble_MPIAIJ(mat)); 808 } 809 } 810 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 811 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 812 #if defined(PETSC_HAVE_DEVICE) 813 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 814 #endif 815 PetscCall(MatAssemblyBegin(aij->B, mode)); 816 PetscCall(MatAssemblyEnd(aij->B, mode)); 817 818 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 819 820 aij->rowvalues = NULL; 821 822 PetscCall(VecDestroy(&aij->diag)); 823 824 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 825 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 826 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 827 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 828 } 829 #if defined(PETSC_HAVE_DEVICE) 830 mat->offloadmask = PETSC_OFFLOAD_BOTH; 831 #endif 832 PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 836 { 837 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 838 839 PetscFunctionBegin; 840 PetscCall(MatZeroEntries(l->A)); 841 PetscCall(MatZeroEntries(l->B)); 842 PetscFunctionReturn(PETSC_SUCCESS); 843 } 844 845 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 846 { 847 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 848 PetscObjectState sA, sB; 849 PetscInt *lrows; 850 PetscInt r, len; 851 PetscBool cong, lch, gch; 852 853 PetscFunctionBegin; 854 /* get locally owned rows */ 855 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 856 PetscCall(MatHasCongruentLayouts(A, &cong)); 857 /* fix right hand side if needed */ 858 if (x && b) { 859 const PetscScalar *xx; 860 PetscScalar *bb; 861 862 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 863 PetscCall(VecGetArrayRead(x, &xx)); 864 PetscCall(VecGetArray(b, &bb)); 865 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 866 PetscCall(VecRestoreArrayRead(x, &xx)); 867 PetscCall(VecRestoreArray(b, &bb)); 868 } 869 870 sA = mat->A->nonzerostate; 871 sB = mat->B->nonzerostate; 872 873 if (diag != 0.0 && cong) { 874 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 875 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 876 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 877 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 878 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 879 PetscInt nnwA, nnwB; 880 PetscBool nnzA, nnzB; 881 882 nnwA = aijA->nonew; 883 nnwB = aijB->nonew; 884 nnzA = aijA->keepnonzeropattern; 885 nnzB = aijB->keepnonzeropattern; 886 if (!nnzA) { 887 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 888 aijA->nonew = 0; 889 } 890 if (!nnzB) { 891 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 892 aijB->nonew = 0; 893 } 894 /* Must zero here before the next loop */ 895 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 896 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 897 for (r = 0; r < len; ++r) { 898 const PetscInt row = lrows[r] + A->rmap->rstart; 899 if (row >= A->cmap->N) continue; 900 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 901 } 902 aijA->nonew = nnwA; 903 aijB->nonew = nnwB; 904 } else { 905 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 906 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 907 } 908 PetscCall(PetscFree(lrows)); 909 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 910 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 911 912 /* reduce nonzerostate */ 913 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 914 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 915 if (gch) A->nonzerostate++; 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 919 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 920 { 921 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 922 PetscMPIInt n = A->rmap->n; 923 PetscInt i, j, r, m, len = 0; 924 PetscInt *lrows, *owners = A->rmap->range; 925 PetscMPIInt p = 0; 926 PetscSFNode *rrows; 927 PetscSF sf; 928 const PetscScalar *xx; 929 PetscScalar *bb, *mask, *aij_a; 930 Vec xmask, lmask; 931 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 932 const PetscInt *aj, *ii, *ridx; 933 PetscScalar *aa; 934 935 PetscFunctionBegin; 936 /* Create SF where leaves are input rows and roots are owned rows */ 937 PetscCall(PetscMalloc1(n, &lrows)); 938 for (r = 0; r < n; ++r) lrows[r] = -1; 939 PetscCall(PetscMalloc1(N, &rrows)); 940 for (r = 0; r < N; ++r) { 941 const PetscInt idx = rows[r]; 942 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 943 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 944 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 945 } 946 rrows[r].rank = p; 947 rrows[r].index = rows[r] - owners[p]; 948 } 949 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 950 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 951 /* Collect flags for rows to be zeroed */ 952 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 953 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 954 PetscCall(PetscSFDestroy(&sf)); 955 /* Compress and put in row numbers */ 956 for (r = 0; r < n; ++r) 957 if (lrows[r] >= 0) lrows[len++] = r; 958 /* zero diagonal part of matrix */ 959 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 960 /* handle off diagonal part of matrix */ 961 PetscCall(MatCreateVecs(A, &xmask, NULL)); 962 PetscCall(VecDuplicate(l->lvec, &lmask)); 963 PetscCall(VecGetArray(xmask, &bb)); 964 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 965 PetscCall(VecRestoreArray(xmask, &bb)); 966 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 967 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 968 PetscCall(VecDestroy(&xmask)); 969 if (x && b) { /* this code is buggy when the row and column layout don't match */ 970 PetscBool cong; 971 972 PetscCall(MatHasCongruentLayouts(A, &cong)); 973 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 974 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 975 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 976 PetscCall(VecGetArrayRead(l->lvec, &xx)); 977 PetscCall(VecGetArray(b, &bb)); 978 } 979 PetscCall(VecGetArray(lmask, &mask)); 980 /* remove zeroed rows of off diagonal matrix */ 981 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 982 ii = aij->i; 983 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 984 /* loop over all elements of off process part of matrix zeroing removed columns*/ 985 if (aij->compressedrow.use) { 986 m = aij->compressedrow.nrows; 987 ii = aij->compressedrow.i; 988 ridx = aij->compressedrow.rindex; 989 for (i = 0; i < m; i++) { 990 n = ii[i + 1] - ii[i]; 991 aj = aij->j + ii[i]; 992 aa = aij_a + ii[i]; 993 994 for (j = 0; j < n; j++) { 995 if (PetscAbsScalar(mask[*aj])) { 996 if (b) bb[*ridx] -= *aa * xx[*aj]; 997 *aa = 0.0; 998 } 999 aa++; 1000 aj++; 1001 } 1002 ridx++; 1003 } 1004 } else { /* do not use compressed row format */ 1005 m = l->B->rmap->n; 1006 for (i = 0; i < m; i++) { 1007 n = ii[i + 1] - ii[i]; 1008 aj = aij->j + ii[i]; 1009 aa = aij_a + ii[i]; 1010 for (j = 0; j < n; j++) { 1011 if (PetscAbsScalar(mask[*aj])) { 1012 if (b) bb[i] -= *aa * xx[*aj]; 1013 *aa = 0.0; 1014 } 1015 aa++; 1016 aj++; 1017 } 1018 } 1019 } 1020 if (x && b) { 1021 PetscCall(VecRestoreArray(b, &bb)); 1022 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1023 } 1024 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1025 PetscCall(VecRestoreArray(lmask, &mask)); 1026 PetscCall(VecDestroy(&lmask)); 1027 PetscCall(PetscFree(lrows)); 1028 1029 /* only change matrix nonzero state if pattern was allowed to be changed */ 1030 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1031 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1032 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1033 } 1034 PetscFunctionReturn(PETSC_SUCCESS); 1035 } 1036 1037 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1038 { 1039 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1040 PetscInt nt; 1041 VecScatter Mvctx = a->Mvctx; 1042 1043 PetscFunctionBegin; 1044 PetscCall(VecGetLocalSize(xx, &nt)); 1045 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1046 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1047 PetscUseTypeMethod(a->A, mult, xx, yy); 1048 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1049 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1050 PetscFunctionReturn(PETSC_SUCCESS); 1051 } 1052 1053 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1054 { 1055 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1056 1057 PetscFunctionBegin; 1058 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1059 PetscFunctionReturn(PETSC_SUCCESS); 1060 } 1061 1062 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1063 { 1064 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1065 VecScatter Mvctx = a->Mvctx; 1066 1067 PetscFunctionBegin; 1068 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1069 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1070 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1071 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1072 PetscFunctionReturn(PETSC_SUCCESS); 1073 } 1074 1075 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1078 1079 PetscFunctionBegin; 1080 /* do nondiagonal part */ 1081 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1082 /* do local part */ 1083 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1084 /* add partial results together */ 1085 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1086 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1091 { 1092 MPI_Comm comm; 1093 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1094 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1095 IS Me, Notme; 1096 PetscInt M, N, first, last, *notme, i; 1097 PetscBool lf; 1098 PetscMPIInt size; 1099 1100 PetscFunctionBegin; 1101 /* Easy test: symmetric diagonal block */ 1102 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1103 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1104 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1105 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1106 PetscCallMPI(MPI_Comm_size(comm, &size)); 1107 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1108 1109 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1110 PetscCall(MatGetSize(Amat, &M, &N)); 1111 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1112 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1113 for (i = 0; i < first; i++) notme[i] = i; 1114 for (i = last; i < M; i++) notme[i - last + first] = i; 1115 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1116 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1117 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1118 Aoff = Aoffs[0]; 1119 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1120 Boff = Boffs[0]; 1121 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1122 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1123 PetscCall(MatDestroyMatrices(1, &Boffs)); 1124 PetscCall(ISDestroy(&Me)); 1125 PetscCall(ISDestroy(&Notme)); 1126 PetscCall(PetscFree(notme)); 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1131 { 1132 PetscFunctionBegin; 1133 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1134 PetscFunctionReturn(PETSC_SUCCESS); 1135 } 1136 1137 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1138 { 1139 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1140 1141 PetscFunctionBegin; 1142 /* do nondiagonal part */ 1143 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1144 /* do local part */ 1145 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1146 /* add partial results together */ 1147 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1148 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1149 PetscFunctionReturn(PETSC_SUCCESS); 1150 } 1151 1152 /* 1153 This only works correctly for square matrices where the subblock A->A is the 1154 diagonal block 1155 */ 1156 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1157 { 1158 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1159 1160 PetscFunctionBegin; 1161 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1162 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1163 PetscCall(MatGetDiagonal(a->A, v)); 1164 PetscFunctionReturn(PETSC_SUCCESS); 1165 } 1166 1167 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1168 { 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1170 1171 PetscFunctionBegin; 1172 PetscCall(MatScale(a->A, aa)); 1173 PetscCall(MatScale(a->B, aa)); 1174 PetscFunctionReturn(PETSC_SUCCESS); 1175 } 1176 1177 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1178 { 1179 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1180 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1181 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1182 const PetscInt *garray = aij->garray; 1183 const PetscScalar *aa, *ba; 1184 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1185 PetscInt64 nz, hnz; 1186 PetscInt *rowlens; 1187 PetscInt *colidxs; 1188 PetscScalar *matvals; 1189 PetscMPIInt rank; 1190 1191 PetscFunctionBegin; 1192 PetscCall(PetscViewerSetUp(viewer)); 1193 1194 M = mat->rmap->N; 1195 N = mat->cmap->N; 1196 m = mat->rmap->n; 1197 rs = mat->rmap->rstart; 1198 cs = mat->cmap->rstart; 1199 nz = A->nz + B->nz; 1200 1201 /* write matrix header */ 1202 header[0] = MAT_FILE_CLASSID; 1203 header[1] = M; 1204 header[2] = N; 1205 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1206 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1207 if (rank == 0) { 1208 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1209 else header[3] = (PetscInt)hnz; 1210 } 1211 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1212 1213 /* fill in and store row lengths */ 1214 PetscCall(PetscMalloc1(m, &rowlens)); 1215 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1216 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1217 PetscCall(PetscFree(rowlens)); 1218 1219 /* fill in and store column indices */ 1220 PetscCall(PetscMalloc1(nz, &colidxs)); 1221 for (cnt = 0, i = 0; i < m; i++) { 1222 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1223 if (garray[B->j[jb]] > cs) break; 1224 colidxs[cnt++] = garray[B->j[jb]]; 1225 } 1226 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1227 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1228 } 1229 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1230 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1231 PetscCall(PetscFree(colidxs)); 1232 1233 /* fill in and store nonzero values */ 1234 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1235 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1236 PetscCall(PetscMalloc1(nz, &matvals)); 1237 for (cnt = 0, i = 0; i < m; i++) { 1238 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1239 if (garray[B->j[jb]] > cs) break; 1240 matvals[cnt++] = ba[jb]; 1241 } 1242 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1243 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1244 } 1245 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1246 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1247 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1248 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1249 PetscCall(PetscFree(matvals)); 1250 1251 /* write block size option to the viewer's .info file */ 1252 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1253 PetscFunctionReturn(PETSC_SUCCESS); 1254 } 1255 1256 #include <petscdraw.h> 1257 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1258 { 1259 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1260 PetscMPIInt rank = aij->rank, size = aij->size; 1261 PetscBool isdraw, iascii, isbinary; 1262 PetscViewer sviewer; 1263 PetscViewerFormat format; 1264 1265 PetscFunctionBegin; 1266 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1267 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1268 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1269 if (iascii) { 1270 PetscCall(PetscViewerGetFormat(viewer, &format)); 1271 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1272 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1273 PetscCall(PetscMalloc1(size, &nz)); 1274 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1275 for (i = 0; i < (PetscInt)size; i++) { 1276 nmax = PetscMax(nmax, nz[i]); 1277 nmin = PetscMin(nmin, nz[i]); 1278 navg += nz[i]; 1279 } 1280 PetscCall(PetscFree(nz)); 1281 navg = navg / size; 1282 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1283 PetscFunctionReturn(PETSC_SUCCESS); 1284 } 1285 PetscCall(PetscViewerGetFormat(viewer, &format)); 1286 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1287 MatInfo info; 1288 PetscInt *inodes = NULL; 1289 1290 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1291 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1292 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1293 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1294 if (!inodes) { 1295 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1296 (double)info.memory)); 1297 } else { 1298 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1299 (double)info.memory)); 1300 } 1301 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1302 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1303 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1304 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1305 PetscCall(PetscViewerFlush(viewer)); 1306 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1307 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1308 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1309 PetscFunctionReturn(PETSC_SUCCESS); 1310 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1311 PetscInt inodecount, inodelimit, *inodes; 1312 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1313 if (inodes) { 1314 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1315 } else { 1316 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1317 } 1318 PetscFunctionReturn(PETSC_SUCCESS); 1319 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1320 PetscFunctionReturn(PETSC_SUCCESS); 1321 } 1322 } else if (isbinary) { 1323 if (size == 1) { 1324 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1325 PetscCall(MatView(aij->A, viewer)); 1326 } else { 1327 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1328 } 1329 PetscFunctionReturn(PETSC_SUCCESS); 1330 } else if (iascii && size == 1) { 1331 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1332 PetscCall(MatView(aij->A, viewer)); 1333 PetscFunctionReturn(PETSC_SUCCESS); 1334 } else if (isdraw) { 1335 PetscDraw draw; 1336 PetscBool isnull; 1337 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1338 PetscCall(PetscDrawIsNull(draw, &isnull)); 1339 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1340 } 1341 1342 { /* assemble the entire matrix onto first processor */ 1343 Mat A = NULL, Av; 1344 IS isrow, iscol; 1345 1346 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1347 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1348 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1349 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1350 /* The commented code uses MatCreateSubMatrices instead */ 1351 /* 1352 Mat *AA, A = NULL, Av; 1353 IS isrow,iscol; 1354 1355 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1356 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1357 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1358 if (rank == 0) { 1359 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1360 A = AA[0]; 1361 Av = AA[0]; 1362 } 1363 PetscCall(MatDestroySubMatrices(1,&AA)); 1364 */ 1365 PetscCall(ISDestroy(&iscol)); 1366 PetscCall(ISDestroy(&isrow)); 1367 /* 1368 Everyone has to call to draw the matrix since the graphics waits are 1369 synchronized across all processors that share the PetscDraw object 1370 */ 1371 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1372 if (rank == 0) { 1373 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1374 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1375 } 1376 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 PetscCall(PetscViewerFlush(viewer)); 1378 PetscCall(MatDestroy(&A)); 1379 } 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1384 { 1385 PetscBool iascii, isdraw, issocket, isbinary; 1386 1387 PetscFunctionBegin; 1388 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1389 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1390 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1391 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1392 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1393 PetscFunctionReturn(PETSC_SUCCESS); 1394 } 1395 1396 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1397 { 1398 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1399 Vec bb1 = NULL; 1400 PetscBool hasop; 1401 1402 PetscFunctionBegin; 1403 if (flag == SOR_APPLY_UPPER) { 1404 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1405 PetscFunctionReturn(PETSC_SUCCESS); 1406 } 1407 1408 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1409 1410 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1411 if (flag & SOR_ZERO_INITIAL_GUESS) { 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1413 its--; 1414 } 1415 1416 while (its--) { 1417 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1418 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1419 1420 /* update rhs: bb1 = bb - B*x */ 1421 PetscCall(VecScale(mat->lvec, -1.0)); 1422 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1423 1424 /* local sweep */ 1425 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1426 } 1427 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1428 if (flag & SOR_ZERO_INITIAL_GUESS) { 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1430 its--; 1431 } 1432 while (its--) { 1433 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1434 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1435 1436 /* update rhs: bb1 = bb - B*x */ 1437 PetscCall(VecScale(mat->lvec, -1.0)); 1438 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1439 1440 /* local sweep */ 1441 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1442 } 1443 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1444 if (flag & SOR_ZERO_INITIAL_GUESS) { 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1446 its--; 1447 } 1448 while (its--) { 1449 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1450 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1451 1452 /* update rhs: bb1 = bb - B*x */ 1453 PetscCall(VecScale(mat->lvec, -1.0)); 1454 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1455 1456 /* local sweep */ 1457 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1458 } 1459 } else if (flag & SOR_EISENSTAT) { 1460 Vec xx1; 1461 1462 PetscCall(VecDuplicate(bb, &xx1)); 1463 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1464 1465 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1466 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1467 if (!mat->diag) { 1468 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1469 PetscCall(MatGetDiagonal(matin, mat->diag)); 1470 } 1471 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1472 if (hasop) { 1473 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1474 } else { 1475 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1476 } 1477 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1478 1479 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1480 1481 /* local sweep */ 1482 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1483 PetscCall(VecAXPY(xx, 1.0, xx1)); 1484 PetscCall(VecDestroy(&xx1)); 1485 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1486 1487 PetscCall(VecDestroy(&bb1)); 1488 1489 matin->factorerrortype = mat->A->factorerrortype; 1490 PetscFunctionReturn(PETSC_SUCCESS); 1491 } 1492 1493 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1494 { 1495 Mat aA, aB, Aperm; 1496 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1497 PetscScalar *aa, *ba; 1498 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1499 PetscSF rowsf, sf; 1500 IS parcolp = NULL; 1501 PetscBool done; 1502 1503 PetscFunctionBegin; 1504 PetscCall(MatGetLocalSize(A, &m, &n)); 1505 PetscCall(ISGetIndices(rowp, &rwant)); 1506 PetscCall(ISGetIndices(colp, &cwant)); 1507 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1508 1509 /* Invert row permutation to find out where my rows should go */ 1510 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1511 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1512 PetscCall(PetscSFSetFromOptions(rowsf)); 1513 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1514 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1515 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1516 1517 /* Invert column permutation to find out where my columns should go */ 1518 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1519 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1520 PetscCall(PetscSFSetFromOptions(sf)); 1521 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1522 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1523 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1524 PetscCall(PetscSFDestroy(&sf)); 1525 1526 PetscCall(ISRestoreIndices(rowp, &rwant)); 1527 PetscCall(ISRestoreIndices(colp, &cwant)); 1528 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1529 1530 /* Find out where my gcols should go */ 1531 PetscCall(MatGetSize(aB, NULL, &ng)); 1532 PetscCall(PetscMalloc1(ng, &gcdest)); 1533 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1534 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1535 PetscCall(PetscSFSetFromOptions(sf)); 1536 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1537 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1538 PetscCall(PetscSFDestroy(&sf)); 1539 1540 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1541 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1542 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1543 for (i = 0; i < m; i++) { 1544 PetscInt row = rdest[i]; 1545 PetscMPIInt rowner; 1546 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1547 for (j = ai[i]; j < ai[i + 1]; j++) { 1548 PetscInt col = cdest[aj[j]]; 1549 PetscMPIInt cowner; 1550 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1551 if (rowner == cowner) dnnz[i]++; 1552 else onnz[i]++; 1553 } 1554 for (j = bi[i]; j < bi[i + 1]; j++) { 1555 PetscInt col = gcdest[bj[j]]; 1556 PetscMPIInt cowner; 1557 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1558 if (rowner == cowner) dnnz[i]++; 1559 else onnz[i]++; 1560 } 1561 } 1562 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1563 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1564 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1565 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1566 PetscCall(PetscSFDestroy(&rowsf)); 1567 1568 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1569 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1570 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1571 for (i = 0; i < m; i++) { 1572 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1573 PetscInt j0, rowlen; 1574 rowlen = ai[i + 1] - ai[i]; 1575 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1576 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1577 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1578 } 1579 rowlen = bi[i + 1] - bi[i]; 1580 for (j0 = j = 0; j < rowlen; j0 = j) { 1581 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1582 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1583 } 1584 } 1585 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1586 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1587 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1588 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1589 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1590 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1591 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1592 PetscCall(PetscFree3(work, rdest, cdest)); 1593 PetscCall(PetscFree(gcdest)); 1594 if (parcolp) PetscCall(ISDestroy(&colp)); 1595 *B = Aperm; 1596 PetscFunctionReturn(PETSC_SUCCESS); 1597 } 1598 1599 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1600 { 1601 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1602 1603 PetscFunctionBegin; 1604 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1605 if (ghosts) *ghosts = aij->garray; 1606 PetscFunctionReturn(PETSC_SUCCESS); 1607 } 1608 1609 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1610 { 1611 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1612 Mat A = mat->A, B = mat->B; 1613 PetscLogDouble isend[5], irecv[5]; 1614 1615 PetscFunctionBegin; 1616 info->block_size = 1.0; 1617 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1618 1619 isend[0] = info->nz_used; 1620 isend[1] = info->nz_allocated; 1621 isend[2] = info->nz_unneeded; 1622 isend[3] = info->memory; 1623 isend[4] = info->mallocs; 1624 1625 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1626 1627 isend[0] += info->nz_used; 1628 isend[1] += info->nz_allocated; 1629 isend[2] += info->nz_unneeded; 1630 isend[3] += info->memory; 1631 isend[4] += info->mallocs; 1632 if (flag == MAT_LOCAL) { 1633 info->nz_used = isend[0]; 1634 info->nz_allocated = isend[1]; 1635 info->nz_unneeded = isend[2]; 1636 info->memory = isend[3]; 1637 info->mallocs = isend[4]; 1638 } else if (flag == MAT_GLOBAL_MAX) { 1639 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1640 1641 info->nz_used = irecv[0]; 1642 info->nz_allocated = irecv[1]; 1643 info->nz_unneeded = irecv[2]; 1644 info->memory = irecv[3]; 1645 info->mallocs = irecv[4]; 1646 } else if (flag == MAT_GLOBAL_SUM) { 1647 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1648 1649 info->nz_used = irecv[0]; 1650 info->nz_allocated = irecv[1]; 1651 info->nz_unneeded = irecv[2]; 1652 info->memory = irecv[3]; 1653 info->mallocs = irecv[4]; 1654 } 1655 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1656 info->fill_ratio_needed = 0; 1657 info->factor_mallocs = 0; 1658 PetscFunctionReturn(PETSC_SUCCESS); 1659 } 1660 1661 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1662 { 1663 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1664 1665 PetscFunctionBegin; 1666 switch (op) { 1667 case MAT_NEW_NONZERO_LOCATIONS: 1668 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1669 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1670 case MAT_KEEP_NONZERO_PATTERN: 1671 case MAT_NEW_NONZERO_LOCATION_ERR: 1672 case MAT_USE_INODES: 1673 case MAT_IGNORE_ZERO_ENTRIES: 1674 case MAT_FORM_EXPLICIT_TRANSPOSE: 1675 MatCheckPreallocated(A, 1); 1676 PetscCall(MatSetOption(a->A, op, flg)); 1677 PetscCall(MatSetOption(a->B, op, flg)); 1678 break; 1679 case MAT_ROW_ORIENTED: 1680 MatCheckPreallocated(A, 1); 1681 a->roworiented = flg; 1682 1683 PetscCall(MatSetOption(a->A, op, flg)); 1684 PetscCall(MatSetOption(a->B, op, flg)); 1685 break; 1686 case MAT_FORCE_DIAGONAL_ENTRIES: 1687 case MAT_SORTED_FULL: 1688 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1689 break; 1690 case MAT_IGNORE_OFF_PROC_ENTRIES: 1691 a->donotstash = flg; 1692 break; 1693 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1694 case MAT_SPD: 1695 case MAT_SYMMETRIC: 1696 case MAT_STRUCTURALLY_SYMMETRIC: 1697 case MAT_HERMITIAN: 1698 case MAT_SYMMETRY_ETERNAL: 1699 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1700 case MAT_SPD_ETERNAL: 1701 /* if the diagonal matrix is square it inherits some of the properties above */ 1702 break; 1703 case MAT_SUBMAT_SINGLEIS: 1704 A->submat_singleis = flg; 1705 break; 1706 case MAT_STRUCTURE_ONLY: 1707 /* The option is handled directly by MatSetOption() */ 1708 break; 1709 default: 1710 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1711 } 1712 PetscFunctionReturn(PETSC_SUCCESS); 1713 } 1714 1715 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1716 { 1717 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1718 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1719 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1720 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1721 PetscInt *cmap, *idx_p; 1722 1723 PetscFunctionBegin; 1724 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1725 mat->getrowactive = PETSC_TRUE; 1726 1727 if (!mat->rowvalues && (idx || v)) { 1728 /* 1729 allocate enough space to hold information from the longest row. 1730 */ 1731 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1732 PetscInt max = 1, tmp; 1733 for (i = 0; i < matin->rmap->n; i++) { 1734 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1735 if (max < tmp) max = tmp; 1736 } 1737 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1738 } 1739 1740 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1741 lrow = row - rstart; 1742 1743 pvA = &vworkA; 1744 pcA = &cworkA; 1745 pvB = &vworkB; 1746 pcB = &cworkB; 1747 if (!v) { 1748 pvA = NULL; 1749 pvB = NULL; 1750 } 1751 if (!idx) { 1752 pcA = NULL; 1753 if (!v) pcB = NULL; 1754 } 1755 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1756 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1757 nztot = nzA + nzB; 1758 1759 cmap = mat->garray; 1760 if (v || idx) { 1761 if (nztot) { 1762 /* Sort by increasing column numbers, assuming A and B already sorted */ 1763 PetscInt imark = -1; 1764 if (v) { 1765 *v = v_p = mat->rowvalues; 1766 for (i = 0; i < nzB; i++) { 1767 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1768 else break; 1769 } 1770 imark = i; 1771 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1772 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1773 } 1774 if (idx) { 1775 *idx = idx_p = mat->rowindices; 1776 if (imark > -1) { 1777 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1778 } else { 1779 for (i = 0; i < nzB; i++) { 1780 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1781 else break; 1782 } 1783 imark = i; 1784 } 1785 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1786 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1787 } 1788 } else { 1789 if (idx) *idx = NULL; 1790 if (v) *v = NULL; 1791 } 1792 } 1793 *nz = nztot; 1794 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1795 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1796 PetscFunctionReturn(PETSC_SUCCESS); 1797 } 1798 1799 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1800 { 1801 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1802 1803 PetscFunctionBegin; 1804 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1805 aij->getrowactive = PETSC_FALSE; 1806 PetscFunctionReturn(PETSC_SUCCESS); 1807 } 1808 1809 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1810 { 1811 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1812 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1813 PetscInt i, j, cstart = mat->cmap->rstart; 1814 PetscReal sum = 0.0; 1815 const MatScalar *v, *amata, *bmata; 1816 1817 PetscFunctionBegin; 1818 if (aij->size == 1) { 1819 PetscCall(MatNorm(aij->A, type, norm)); 1820 } else { 1821 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1822 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1823 if (type == NORM_FROBENIUS) { 1824 v = amata; 1825 for (i = 0; i < amat->nz; i++) { 1826 sum += PetscRealPart(PetscConj(*v) * (*v)); 1827 v++; 1828 } 1829 v = bmata; 1830 for (i = 0; i < bmat->nz; i++) { 1831 sum += PetscRealPart(PetscConj(*v) * (*v)); 1832 v++; 1833 } 1834 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1835 *norm = PetscSqrtReal(*norm); 1836 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1837 } else if (type == NORM_1) { /* max column norm */ 1838 PetscReal *tmp, *tmp2; 1839 PetscInt *jj, *garray = aij->garray; 1840 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1841 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1842 *norm = 0.0; 1843 v = amata; 1844 jj = amat->j; 1845 for (j = 0; j < amat->nz; j++) { 1846 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1847 v++; 1848 } 1849 v = bmata; 1850 jj = bmat->j; 1851 for (j = 0; j < bmat->nz; j++) { 1852 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1853 v++; 1854 } 1855 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1856 for (j = 0; j < mat->cmap->N; j++) { 1857 if (tmp2[j] > *norm) *norm = tmp2[j]; 1858 } 1859 PetscCall(PetscFree(tmp)); 1860 PetscCall(PetscFree(tmp2)); 1861 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1862 } else if (type == NORM_INFINITY) { /* max row norm */ 1863 PetscReal ntemp = 0.0; 1864 for (j = 0; j < aij->A->rmap->n; j++) { 1865 v = amata + amat->i[j]; 1866 sum = 0.0; 1867 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1868 sum += PetscAbsScalar(*v); 1869 v++; 1870 } 1871 v = bmata + bmat->i[j]; 1872 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1873 sum += PetscAbsScalar(*v); 1874 v++; 1875 } 1876 if (sum > ntemp) ntemp = sum; 1877 } 1878 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1879 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1880 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1881 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1882 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1883 } 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } 1886 1887 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1888 { 1889 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1890 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1891 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1892 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1893 Mat B, A_diag, *B_diag; 1894 const MatScalar *pbv, *bv; 1895 1896 PetscFunctionBegin; 1897 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1898 ma = A->rmap->n; 1899 na = A->cmap->n; 1900 mb = a->B->rmap->n; 1901 nb = a->B->cmap->n; 1902 ai = Aloc->i; 1903 aj = Aloc->j; 1904 bi = Bloc->i; 1905 bj = Bloc->j; 1906 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1907 PetscInt *d_nnz, *g_nnz, *o_nnz; 1908 PetscSFNode *oloc; 1909 PETSC_UNUSED PetscSF sf; 1910 1911 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1912 /* compute d_nnz for preallocation */ 1913 PetscCall(PetscArrayzero(d_nnz, na)); 1914 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1915 /* compute local off-diagonal contributions */ 1916 PetscCall(PetscArrayzero(g_nnz, nb)); 1917 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1918 /* map those to global */ 1919 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1920 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1921 PetscCall(PetscSFSetFromOptions(sf)); 1922 PetscCall(PetscArrayzero(o_nnz, na)); 1923 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1924 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1925 PetscCall(PetscSFDestroy(&sf)); 1926 1927 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1928 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1929 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1930 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1931 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1932 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1933 } else { 1934 B = *matout; 1935 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1936 } 1937 1938 b = (Mat_MPIAIJ *)B->data; 1939 A_diag = a->A; 1940 B_diag = &b->A; 1941 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1942 A_diag_ncol = A_diag->cmap->N; 1943 B_diag_ilen = sub_B_diag->ilen; 1944 B_diag_i = sub_B_diag->i; 1945 1946 /* Set ilen for diagonal of B */ 1947 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1948 1949 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1950 very quickly (=without using MatSetValues), because all writes are local. */ 1951 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1952 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1953 1954 /* copy over the B part */ 1955 PetscCall(PetscMalloc1(bi[mb], &cols)); 1956 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1957 pbv = bv; 1958 row = A->rmap->rstart; 1959 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1960 cols_tmp = cols; 1961 for (i = 0; i < mb; i++) { 1962 ncol = bi[i + 1] - bi[i]; 1963 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1964 row++; 1965 if (pbv) pbv += ncol; 1966 if (cols_tmp) cols_tmp += ncol; 1967 } 1968 PetscCall(PetscFree(cols)); 1969 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1970 1971 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1972 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1973 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1974 *matout = B; 1975 } else { 1976 PetscCall(MatHeaderMerge(A, &B)); 1977 } 1978 PetscFunctionReturn(PETSC_SUCCESS); 1979 } 1980 1981 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1982 { 1983 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1984 Mat a = aij->A, b = aij->B; 1985 PetscInt s1, s2, s3; 1986 1987 PetscFunctionBegin; 1988 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1989 if (rr) { 1990 PetscCall(VecGetLocalSize(rr, &s1)); 1991 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1992 /* Overlap communication with computation. */ 1993 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1994 } 1995 if (ll) { 1996 PetscCall(VecGetLocalSize(ll, &s1)); 1997 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1998 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1999 } 2000 /* scale the diagonal block */ 2001 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2002 2003 if (rr) { 2004 /* Do a scatter end and then right scale the off-diagonal block */ 2005 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2006 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2007 } 2008 PetscFunctionReturn(PETSC_SUCCESS); 2009 } 2010 2011 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2012 { 2013 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2014 2015 PetscFunctionBegin; 2016 PetscCall(MatSetUnfactored(a->A)); 2017 PetscFunctionReturn(PETSC_SUCCESS); 2018 } 2019 2020 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2021 { 2022 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2023 Mat a, b, c, d; 2024 PetscBool flg; 2025 2026 PetscFunctionBegin; 2027 a = matA->A; 2028 b = matA->B; 2029 c = matB->A; 2030 d = matB->B; 2031 2032 PetscCall(MatEqual(a, c, &flg)); 2033 if (flg) PetscCall(MatEqual(b, d, &flg)); 2034 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2035 PetscFunctionReturn(PETSC_SUCCESS); 2036 } 2037 2038 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2039 { 2040 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2041 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2042 2043 PetscFunctionBegin; 2044 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2045 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2046 /* because of the column compression in the off-processor part of the matrix a->B, 2047 the number of columns in a->B and b->B may be different, hence we cannot call 2048 the MatCopy() directly on the two parts. If need be, we can provide a more 2049 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2050 then copying the submatrices */ 2051 PetscCall(MatCopy_Basic(A, B, str)); 2052 } else { 2053 PetscCall(MatCopy(a->A, b->A, str)); 2054 PetscCall(MatCopy(a->B, b->B, str)); 2055 } 2056 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2057 PetscFunctionReturn(PETSC_SUCCESS); 2058 } 2059 2060 /* 2061 Computes the number of nonzeros per row needed for preallocation when X and Y 2062 have different nonzero structure. 2063 */ 2064 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2065 { 2066 PetscInt i, j, k, nzx, nzy; 2067 2068 PetscFunctionBegin; 2069 /* Set the number of nonzeros in the new matrix */ 2070 for (i = 0; i < m; i++) { 2071 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2072 nzx = xi[i + 1] - xi[i]; 2073 nzy = yi[i + 1] - yi[i]; 2074 nnz[i] = 0; 2075 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2076 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2077 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2078 nnz[i]++; 2079 } 2080 for (; k < nzy; k++) nnz[i]++; 2081 } 2082 PetscFunctionReturn(PETSC_SUCCESS); 2083 } 2084 2085 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2086 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2087 { 2088 PetscInt m = Y->rmap->N; 2089 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2090 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2091 2092 PetscFunctionBegin; 2093 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2094 PetscFunctionReturn(PETSC_SUCCESS); 2095 } 2096 2097 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2098 { 2099 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2100 2101 PetscFunctionBegin; 2102 if (str == SAME_NONZERO_PATTERN) { 2103 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2104 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2105 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2106 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2107 } else { 2108 Mat B; 2109 PetscInt *nnz_d, *nnz_o; 2110 2111 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2112 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2113 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2114 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2115 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2116 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2117 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2118 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2119 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2120 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2121 PetscCall(MatHeaderMerge(Y, &B)); 2122 PetscCall(PetscFree(nnz_d)); 2123 PetscCall(PetscFree(nnz_o)); 2124 } 2125 PetscFunctionReturn(PETSC_SUCCESS); 2126 } 2127 2128 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2129 2130 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2131 { 2132 PetscFunctionBegin; 2133 if (PetscDefined(USE_COMPLEX)) { 2134 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2135 2136 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2137 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2138 } 2139 PetscFunctionReturn(PETSC_SUCCESS); 2140 } 2141 2142 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2143 { 2144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2145 2146 PetscFunctionBegin; 2147 PetscCall(MatRealPart(a->A)); 2148 PetscCall(MatRealPart(a->B)); 2149 PetscFunctionReturn(PETSC_SUCCESS); 2150 } 2151 2152 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2153 { 2154 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2155 2156 PetscFunctionBegin; 2157 PetscCall(MatImaginaryPart(a->A)); 2158 PetscCall(MatImaginaryPart(a->B)); 2159 PetscFunctionReturn(PETSC_SUCCESS); 2160 } 2161 2162 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2163 { 2164 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2165 PetscInt i, *idxb = NULL, m = A->rmap->n; 2166 PetscScalar *va, *vv; 2167 Vec vB, vA; 2168 const PetscScalar *vb; 2169 2170 PetscFunctionBegin; 2171 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2172 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2173 2174 PetscCall(VecGetArrayWrite(vA, &va)); 2175 if (idx) { 2176 for (i = 0; i < m; i++) { 2177 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2178 } 2179 } 2180 2181 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2182 PetscCall(PetscMalloc1(m, &idxb)); 2183 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2184 2185 PetscCall(VecGetArrayWrite(v, &vv)); 2186 PetscCall(VecGetArrayRead(vB, &vb)); 2187 for (i = 0; i < m; i++) { 2188 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2189 vv[i] = vb[i]; 2190 if (idx) idx[i] = a->garray[idxb[i]]; 2191 } else { 2192 vv[i] = va[i]; 2193 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2194 } 2195 } 2196 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2197 PetscCall(VecRestoreArrayWrite(vA, &va)); 2198 PetscCall(VecRestoreArrayRead(vB, &vb)); 2199 PetscCall(PetscFree(idxb)); 2200 PetscCall(VecDestroy(&vA)); 2201 PetscCall(VecDestroy(&vB)); 2202 PetscFunctionReturn(PETSC_SUCCESS); 2203 } 2204 2205 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2206 { 2207 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2208 PetscInt m = A->rmap->n, n = A->cmap->n; 2209 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2210 PetscInt *cmap = mat->garray; 2211 PetscInt *diagIdx, *offdiagIdx; 2212 Vec diagV, offdiagV; 2213 PetscScalar *a, *diagA, *offdiagA; 2214 const PetscScalar *ba, *bav; 2215 PetscInt r, j, col, ncols, *bi, *bj; 2216 Mat B = mat->B; 2217 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2218 2219 PetscFunctionBegin; 2220 /* When a process holds entire A and other processes have no entry */ 2221 if (A->cmap->N == n) { 2222 PetscCall(VecGetArrayWrite(v, &diagA)); 2223 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2224 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2225 PetscCall(VecDestroy(&diagV)); 2226 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } else if (n == 0) { 2229 if (m) { 2230 PetscCall(VecGetArrayWrite(v, &a)); 2231 for (r = 0; r < m; r++) { 2232 a[r] = 0.0; 2233 if (idx) idx[r] = -1; 2234 } 2235 PetscCall(VecRestoreArrayWrite(v, &a)); 2236 } 2237 PetscFunctionReturn(PETSC_SUCCESS); 2238 } 2239 2240 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2241 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2242 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2243 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2244 2245 /* Get offdiagIdx[] for implicit 0.0 */ 2246 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2247 ba = bav; 2248 bi = b->i; 2249 bj = b->j; 2250 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2251 for (r = 0; r < m; r++) { 2252 ncols = bi[r + 1] - bi[r]; 2253 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2254 offdiagA[r] = *ba; 2255 offdiagIdx[r] = cmap[0]; 2256 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2257 offdiagA[r] = 0.0; 2258 2259 /* Find first hole in the cmap */ 2260 for (j = 0; j < ncols; j++) { 2261 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2262 if (col > j && j < cstart) { 2263 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2264 break; 2265 } else if (col > j + n && j >= cstart) { 2266 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2267 break; 2268 } 2269 } 2270 if (j == ncols && ncols < A->cmap->N - n) { 2271 /* a hole is outside compressed Bcols */ 2272 if (ncols == 0) { 2273 if (cstart) { 2274 offdiagIdx[r] = 0; 2275 } else offdiagIdx[r] = cend; 2276 } else { /* ncols > 0 */ 2277 offdiagIdx[r] = cmap[ncols - 1] + 1; 2278 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2279 } 2280 } 2281 } 2282 2283 for (j = 0; j < ncols; j++) { 2284 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2285 offdiagA[r] = *ba; 2286 offdiagIdx[r] = cmap[*bj]; 2287 } 2288 ba++; 2289 bj++; 2290 } 2291 } 2292 2293 PetscCall(VecGetArrayWrite(v, &a)); 2294 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2295 for (r = 0; r < m; ++r) { 2296 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2297 a[r] = diagA[r]; 2298 if (idx) idx[r] = cstart + diagIdx[r]; 2299 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2300 a[r] = diagA[r]; 2301 if (idx) { 2302 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2303 idx[r] = cstart + diagIdx[r]; 2304 } else idx[r] = offdiagIdx[r]; 2305 } 2306 } else { 2307 a[r] = offdiagA[r]; 2308 if (idx) idx[r] = offdiagIdx[r]; 2309 } 2310 } 2311 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2312 PetscCall(VecRestoreArrayWrite(v, &a)); 2313 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2314 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2315 PetscCall(VecDestroy(&diagV)); 2316 PetscCall(VecDestroy(&offdiagV)); 2317 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2318 PetscFunctionReturn(PETSC_SUCCESS); 2319 } 2320 2321 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2322 { 2323 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2324 PetscInt m = A->rmap->n, n = A->cmap->n; 2325 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2326 PetscInt *cmap = mat->garray; 2327 PetscInt *diagIdx, *offdiagIdx; 2328 Vec diagV, offdiagV; 2329 PetscScalar *a, *diagA, *offdiagA; 2330 const PetscScalar *ba, *bav; 2331 PetscInt r, j, col, ncols, *bi, *bj; 2332 Mat B = mat->B; 2333 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2334 2335 PetscFunctionBegin; 2336 /* When a process holds entire A and other processes have no entry */ 2337 if (A->cmap->N == n) { 2338 PetscCall(VecGetArrayWrite(v, &diagA)); 2339 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2340 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2341 PetscCall(VecDestroy(&diagV)); 2342 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2343 PetscFunctionReturn(PETSC_SUCCESS); 2344 } else if (n == 0) { 2345 if (m) { 2346 PetscCall(VecGetArrayWrite(v, &a)); 2347 for (r = 0; r < m; r++) { 2348 a[r] = PETSC_MAX_REAL; 2349 if (idx) idx[r] = -1; 2350 } 2351 PetscCall(VecRestoreArrayWrite(v, &a)); 2352 } 2353 PetscFunctionReturn(PETSC_SUCCESS); 2354 } 2355 2356 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2357 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2358 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2359 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2360 2361 /* Get offdiagIdx[] for implicit 0.0 */ 2362 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2363 ba = bav; 2364 bi = b->i; 2365 bj = b->j; 2366 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2367 for (r = 0; r < m; r++) { 2368 ncols = bi[r + 1] - bi[r]; 2369 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2370 offdiagA[r] = *ba; 2371 offdiagIdx[r] = cmap[0]; 2372 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2373 offdiagA[r] = 0.0; 2374 2375 /* Find first hole in the cmap */ 2376 for (j = 0; j < ncols; j++) { 2377 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2378 if (col > j && j < cstart) { 2379 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2380 break; 2381 } else if (col > j + n && j >= cstart) { 2382 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2383 break; 2384 } 2385 } 2386 if (j == ncols && ncols < A->cmap->N - n) { 2387 /* a hole is outside compressed Bcols */ 2388 if (ncols == 0) { 2389 if (cstart) { 2390 offdiagIdx[r] = 0; 2391 } else offdiagIdx[r] = cend; 2392 } else { /* ncols > 0 */ 2393 offdiagIdx[r] = cmap[ncols - 1] + 1; 2394 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2395 } 2396 } 2397 } 2398 2399 for (j = 0; j < ncols; j++) { 2400 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2401 offdiagA[r] = *ba; 2402 offdiagIdx[r] = cmap[*bj]; 2403 } 2404 ba++; 2405 bj++; 2406 } 2407 } 2408 2409 PetscCall(VecGetArrayWrite(v, &a)); 2410 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2411 for (r = 0; r < m; ++r) { 2412 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2413 a[r] = diagA[r]; 2414 if (idx) idx[r] = cstart + diagIdx[r]; 2415 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2416 a[r] = diagA[r]; 2417 if (idx) { 2418 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2419 idx[r] = cstart + diagIdx[r]; 2420 } else idx[r] = offdiagIdx[r]; 2421 } 2422 } else { 2423 a[r] = offdiagA[r]; 2424 if (idx) idx[r] = offdiagIdx[r]; 2425 } 2426 } 2427 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2428 PetscCall(VecRestoreArrayWrite(v, &a)); 2429 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2430 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2431 PetscCall(VecDestroy(&diagV)); 2432 PetscCall(VecDestroy(&offdiagV)); 2433 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2434 PetscFunctionReturn(PETSC_SUCCESS); 2435 } 2436 2437 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2438 { 2439 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2440 PetscInt m = A->rmap->n, n = A->cmap->n; 2441 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2442 PetscInt *cmap = mat->garray; 2443 PetscInt *diagIdx, *offdiagIdx; 2444 Vec diagV, offdiagV; 2445 PetscScalar *a, *diagA, *offdiagA; 2446 const PetscScalar *ba, *bav; 2447 PetscInt r, j, col, ncols, *bi, *bj; 2448 Mat B = mat->B; 2449 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2450 2451 PetscFunctionBegin; 2452 /* When a process holds entire A and other processes have no entry */ 2453 if (A->cmap->N == n) { 2454 PetscCall(VecGetArrayWrite(v, &diagA)); 2455 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2456 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2457 PetscCall(VecDestroy(&diagV)); 2458 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2459 PetscFunctionReturn(PETSC_SUCCESS); 2460 } else if (n == 0) { 2461 if (m) { 2462 PetscCall(VecGetArrayWrite(v, &a)); 2463 for (r = 0; r < m; r++) { 2464 a[r] = PETSC_MIN_REAL; 2465 if (idx) idx[r] = -1; 2466 } 2467 PetscCall(VecRestoreArrayWrite(v, &a)); 2468 } 2469 PetscFunctionReturn(PETSC_SUCCESS); 2470 } 2471 2472 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2473 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2474 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2475 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2476 2477 /* Get offdiagIdx[] for implicit 0.0 */ 2478 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2479 ba = bav; 2480 bi = b->i; 2481 bj = b->j; 2482 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2483 for (r = 0; r < m; r++) { 2484 ncols = bi[r + 1] - bi[r]; 2485 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2486 offdiagA[r] = *ba; 2487 offdiagIdx[r] = cmap[0]; 2488 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2489 offdiagA[r] = 0.0; 2490 2491 /* Find first hole in the cmap */ 2492 for (j = 0; j < ncols; j++) { 2493 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2494 if (col > j && j < cstart) { 2495 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2496 break; 2497 } else if (col > j + n && j >= cstart) { 2498 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2499 break; 2500 } 2501 } 2502 if (j == ncols && ncols < A->cmap->N - n) { 2503 /* a hole is outside compressed Bcols */ 2504 if (ncols == 0) { 2505 if (cstart) { 2506 offdiagIdx[r] = 0; 2507 } else offdiagIdx[r] = cend; 2508 } else { /* ncols > 0 */ 2509 offdiagIdx[r] = cmap[ncols - 1] + 1; 2510 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2511 } 2512 } 2513 } 2514 2515 for (j = 0; j < ncols; j++) { 2516 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2517 offdiagA[r] = *ba; 2518 offdiagIdx[r] = cmap[*bj]; 2519 } 2520 ba++; 2521 bj++; 2522 } 2523 } 2524 2525 PetscCall(VecGetArrayWrite(v, &a)); 2526 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2527 for (r = 0; r < m; ++r) { 2528 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2529 a[r] = diagA[r]; 2530 if (idx) idx[r] = cstart + diagIdx[r]; 2531 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2532 a[r] = diagA[r]; 2533 if (idx) { 2534 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2535 idx[r] = cstart + diagIdx[r]; 2536 } else idx[r] = offdiagIdx[r]; 2537 } 2538 } else { 2539 a[r] = offdiagA[r]; 2540 if (idx) idx[r] = offdiagIdx[r]; 2541 } 2542 } 2543 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2544 PetscCall(VecRestoreArrayWrite(v, &a)); 2545 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2546 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2547 PetscCall(VecDestroy(&diagV)); 2548 PetscCall(VecDestroy(&offdiagV)); 2549 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2550 PetscFunctionReturn(PETSC_SUCCESS); 2551 } 2552 2553 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2554 { 2555 Mat *dummy; 2556 2557 PetscFunctionBegin; 2558 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2559 *newmat = *dummy; 2560 PetscCall(PetscFree(dummy)); 2561 PetscFunctionReturn(PETSC_SUCCESS); 2562 } 2563 2564 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2565 { 2566 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2567 2568 PetscFunctionBegin; 2569 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2570 A->factorerrortype = a->A->factorerrortype; 2571 PetscFunctionReturn(PETSC_SUCCESS); 2572 } 2573 2574 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2575 { 2576 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2577 2578 PetscFunctionBegin; 2579 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2580 PetscCall(MatSetRandom(aij->A, rctx)); 2581 if (x->assembled) { 2582 PetscCall(MatSetRandom(aij->B, rctx)); 2583 } else { 2584 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2585 } 2586 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2587 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2588 PetscFunctionReturn(PETSC_SUCCESS); 2589 } 2590 2591 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2592 { 2593 PetscFunctionBegin; 2594 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2595 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2596 PetscFunctionReturn(PETSC_SUCCESS); 2597 } 2598 2599 /*@ 2600 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2601 2602 Not Collective 2603 2604 Input Parameter: 2605 . A - the matrix 2606 2607 Output Parameter: 2608 . nz - the number of nonzeros 2609 2610 Level: advanced 2611 2612 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2613 @*/ 2614 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2615 { 2616 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2617 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2618 PetscBool isaij; 2619 2620 PetscFunctionBegin; 2621 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2622 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2623 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2624 PetscFunctionReturn(PETSC_SUCCESS); 2625 } 2626 2627 /*@ 2628 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2629 2630 Collective 2631 2632 Input Parameters: 2633 + A - the matrix 2634 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2635 2636 Level: advanced 2637 2638 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2639 @*/ 2640 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2641 { 2642 PetscFunctionBegin; 2643 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2644 PetscFunctionReturn(PETSC_SUCCESS); 2645 } 2646 2647 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2648 { 2649 PetscBool sc = PETSC_FALSE, flg; 2650 2651 PetscFunctionBegin; 2652 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2653 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2654 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2655 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2656 PetscOptionsHeadEnd(); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2661 { 2662 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2663 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2664 2665 PetscFunctionBegin; 2666 if (!Y->preallocated) { 2667 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2668 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2669 PetscInt nonew = aij->nonew; 2670 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2671 aij->nonew = nonew; 2672 } 2673 PetscCall(MatShift_Basic(Y, a)); 2674 PetscFunctionReturn(PETSC_SUCCESS); 2675 } 2676 2677 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2678 { 2679 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2680 2681 PetscFunctionBegin; 2682 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2683 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2684 if (d) { 2685 PetscInt rstart; 2686 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2687 *d += rstart; 2688 } 2689 PetscFunctionReturn(PETSC_SUCCESS); 2690 } 2691 2692 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2693 { 2694 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2695 2696 PetscFunctionBegin; 2697 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2698 PetscFunctionReturn(PETSC_SUCCESS); 2699 } 2700 2701 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2702 { 2703 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2704 2705 PetscFunctionBegin; 2706 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2707 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2708 PetscFunctionReturn(PETSC_SUCCESS); 2709 } 2710 2711 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2712 MatGetRow_MPIAIJ, 2713 MatRestoreRow_MPIAIJ, 2714 MatMult_MPIAIJ, 2715 /* 4*/ MatMultAdd_MPIAIJ, 2716 MatMultTranspose_MPIAIJ, 2717 MatMultTransposeAdd_MPIAIJ, 2718 NULL, 2719 NULL, 2720 NULL, 2721 /*10*/ NULL, 2722 NULL, 2723 NULL, 2724 MatSOR_MPIAIJ, 2725 MatTranspose_MPIAIJ, 2726 /*15*/ MatGetInfo_MPIAIJ, 2727 MatEqual_MPIAIJ, 2728 MatGetDiagonal_MPIAIJ, 2729 MatDiagonalScale_MPIAIJ, 2730 MatNorm_MPIAIJ, 2731 /*20*/ MatAssemblyBegin_MPIAIJ, 2732 MatAssemblyEnd_MPIAIJ, 2733 MatSetOption_MPIAIJ, 2734 MatZeroEntries_MPIAIJ, 2735 /*24*/ MatZeroRows_MPIAIJ, 2736 NULL, 2737 NULL, 2738 NULL, 2739 NULL, 2740 /*29*/ MatSetUp_MPI_Hash, 2741 NULL, 2742 NULL, 2743 MatGetDiagonalBlock_MPIAIJ, 2744 NULL, 2745 /*34*/ MatDuplicate_MPIAIJ, 2746 NULL, 2747 NULL, 2748 NULL, 2749 NULL, 2750 /*39*/ MatAXPY_MPIAIJ, 2751 MatCreateSubMatrices_MPIAIJ, 2752 MatIncreaseOverlap_MPIAIJ, 2753 MatGetValues_MPIAIJ, 2754 MatCopy_MPIAIJ, 2755 /*44*/ MatGetRowMax_MPIAIJ, 2756 MatScale_MPIAIJ, 2757 MatShift_MPIAIJ, 2758 MatDiagonalSet_MPIAIJ, 2759 MatZeroRowsColumns_MPIAIJ, 2760 /*49*/ MatSetRandom_MPIAIJ, 2761 MatGetRowIJ_MPIAIJ, 2762 MatRestoreRowIJ_MPIAIJ, 2763 NULL, 2764 NULL, 2765 /*54*/ MatFDColoringCreate_MPIXAIJ, 2766 NULL, 2767 MatSetUnfactored_MPIAIJ, 2768 MatPermute_MPIAIJ, 2769 NULL, 2770 /*59*/ MatCreateSubMatrix_MPIAIJ, 2771 MatDestroy_MPIAIJ, 2772 MatView_MPIAIJ, 2773 NULL, 2774 NULL, 2775 /*64*/ NULL, 2776 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2777 NULL, 2778 NULL, 2779 NULL, 2780 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2781 MatGetRowMinAbs_MPIAIJ, 2782 NULL, 2783 NULL, 2784 NULL, 2785 NULL, 2786 /*75*/ MatFDColoringApply_AIJ, 2787 MatSetFromOptions_MPIAIJ, 2788 NULL, 2789 NULL, 2790 MatFindZeroDiagonals_MPIAIJ, 2791 /*80*/ NULL, 2792 NULL, 2793 NULL, 2794 /*83*/ MatLoad_MPIAIJ, 2795 MatIsSymmetric_MPIAIJ, 2796 NULL, 2797 NULL, 2798 NULL, 2799 NULL, 2800 /*89*/ NULL, 2801 NULL, 2802 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2803 NULL, 2804 NULL, 2805 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2806 NULL, 2807 NULL, 2808 NULL, 2809 MatBindToCPU_MPIAIJ, 2810 /*99*/ MatProductSetFromOptions_MPIAIJ, 2811 NULL, 2812 NULL, 2813 MatConjugate_MPIAIJ, 2814 NULL, 2815 /*104*/ MatSetValuesRow_MPIAIJ, 2816 MatRealPart_MPIAIJ, 2817 MatImaginaryPart_MPIAIJ, 2818 NULL, 2819 NULL, 2820 /*109*/ NULL, 2821 NULL, 2822 MatGetRowMin_MPIAIJ, 2823 NULL, 2824 MatMissingDiagonal_MPIAIJ, 2825 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2826 NULL, 2827 MatGetGhosts_MPIAIJ, 2828 NULL, 2829 NULL, 2830 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2831 NULL, 2832 NULL, 2833 NULL, 2834 MatGetMultiProcBlock_MPIAIJ, 2835 /*124*/ MatFindNonzeroRows_MPIAIJ, 2836 MatGetColumnReductions_MPIAIJ, 2837 MatInvertBlockDiagonal_MPIAIJ, 2838 MatInvertVariableBlockDiagonal_MPIAIJ, 2839 MatCreateSubMatricesMPI_MPIAIJ, 2840 /*129*/ NULL, 2841 NULL, 2842 NULL, 2843 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2844 NULL, 2845 /*134*/ NULL, 2846 NULL, 2847 NULL, 2848 NULL, 2849 NULL, 2850 /*139*/ MatSetBlockSizes_MPIAIJ, 2851 NULL, 2852 NULL, 2853 MatFDColoringSetUp_MPIXAIJ, 2854 MatFindOffBlockDiagonalEntries_MPIAIJ, 2855 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2856 /*145*/ NULL, 2857 NULL, 2858 NULL, 2859 MatCreateGraph_Simple_AIJ, 2860 NULL, 2861 /*150*/ NULL, 2862 MatEliminateZeros_MPIAIJ}; 2863 2864 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2865 { 2866 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2867 2868 PetscFunctionBegin; 2869 PetscCall(MatStoreValues(aij->A)); 2870 PetscCall(MatStoreValues(aij->B)); 2871 PetscFunctionReturn(PETSC_SUCCESS); 2872 } 2873 2874 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2875 { 2876 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2877 2878 PetscFunctionBegin; 2879 PetscCall(MatRetrieveValues(aij->A)); 2880 PetscCall(MatRetrieveValues(aij->B)); 2881 PetscFunctionReturn(PETSC_SUCCESS); 2882 } 2883 2884 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2885 { 2886 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2887 PetscMPIInt size; 2888 2889 PetscFunctionBegin; 2890 if (B->hash_active) { 2891 B->ops[0] = b->cops; 2892 B->hash_active = PETSC_FALSE; 2893 } 2894 PetscCall(PetscLayoutSetUp(B->rmap)); 2895 PetscCall(PetscLayoutSetUp(B->cmap)); 2896 2897 #if defined(PETSC_USE_CTABLE) 2898 PetscCall(PetscHMapIDestroy(&b->colmap)); 2899 #else 2900 PetscCall(PetscFree(b->colmap)); 2901 #endif 2902 PetscCall(PetscFree(b->garray)); 2903 PetscCall(VecDestroy(&b->lvec)); 2904 PetscCall(VecScatterDestroy(&b->Mvctx)); 2905 2906 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2907 PetscCall(MatDestroy(&b->B)); 2908 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2909 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2910 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2911 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2912 2913 PetscCall(MatDestroy(&b->A)); 2914 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2915 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2916 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2917 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2918 2919 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2920 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2921 B->preallocated = PETSC_TRUE; 2922 B->was_assembled = PETSC_FALSE; 2923 B->assembled = PETSC_FALSE; 2924 PetscFunctionReturn(PETSC_SUCCESS); 2925 } 2926 2927 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2928 { 2929 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2930 2931 PetscFunctionBegin; 2932 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2933 PetscCall(PetscLayoutSetUp(B->rmap)); 2934 PetscCall(PetscLayoutSetUp(B->cmap)); 2935 2936 #if defined(PETSC_USE_CTABLE) 2937 PetscCall(PetscHMapIDestroy(&b->colmap)); 2938 #else 2939 PetscCall(PetscFree(b->colmap)); 2940 #endif 2941 PetscCall(PetscFree(b->garray)); 2942 PetscCall(VecDestroy(&b->lvec)); 2943 PetscCall(VecScatterDestroy(&b->Mvctx)); 2944 2945 PetscCall(MatResetPreallocation(b->A)); 2946 PetscCall(MatResetPreallocation(b->B)); 2947 B->preallocated = PETSC_TRUE; 2948 B->was_assembled = PETSC_FALSE; 2949 B->assembled = PETSC_FALSE; 2950 PetscFunctionReturn(PETSC_SUCCESS); 2951 } 2952 2953 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2954 { 2955 Mat mat; 2956 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2957 2958 PetscFunctionBegin; 2959 *newmat = NULL; 2960 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2961 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2962 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2963 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2964 a = (Mat_MPIAIJ *)mat->data; 2965 2966 mat->factortype = matin->factortype; 2967 mat->assembled = matin->assembled; 2968 mat->insertmode = NOT_SET_VALUES; 2969 mat->preallocated = matin->preallocated; 2970 2971 a->size = oldmat->size; 2972 a->rank = oldmat->rank; 2973 a->donotstash = oldmat->donotstash; 2974 a->roworiented = oldmat->roworiented; 2975 a->rowindices = NULL; 2976 a->rowvalues = NULL; 2977 a->getrowactive = PETSC_FALSE; 2978 2979 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2980 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2981 2982 if (oldmat->colmap) { 2983 #if defined(PETSC_USE_CTABLE) 2984 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2985 #else 2986 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2987 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2988 #endif 2989 } else a->colmap = NULL; 2990 if (oldmat->garray) { 2991 PetscInt len; 2992 len = oldmat->B->cmap->n; 2993 PetscCall(PetscMalloc1(len + 1, &a->garray)); 2994 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2995 } else a->garray = NULL; 2996 2997 /* It may happen MatDuplicate is called with a non-assembled matrix 2998 In fact, MatDuplicate only requires the matrix to be preallocated 2999 This may happen inside a DMCreateMatrix_Shell */ 3000 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3001 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3002 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3003 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3004 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3005 *newmat = mat; 3006 PetscFunctionReturn(PETSC_SUCCESS); 3007 } 3008 3009 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3010 { 3011 PetscBool isbinary, ishdf5; 3012 3013 PetscFunctionBegin; 3014 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3015 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3016 /* force binary viewer to load .info file if it has not yet done so */ 3017 PetscCall(PetscViewerSetUp(viewer)); 3018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3019 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3020 if (isbinary) { 3021 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3022 } else if (ishdf5) { 3023 #if defined(PETSC_HAVE_HDF5) 3024 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3025 #else 3026 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3027 #endif 3028 } else { 3029 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3030 } 3031 PetscFunctionReturn(PETSC_SUCCESS); 3032 } 3033 3034 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3035 { 3036 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3037 PetscInt *rowidxs, *colidxs; 3038 PetscScalar *matvals; 3039 3040 PetscFunctionBegin; 3041 PetscCall(PetscViewerSetUp(viewer)); 3042 3043 /* read in matrix header */ 3044 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3045 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3046 M = header[1]; 3047 N = header[2]; 3048 nz = header[3]; 3049 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3050 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3051 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3052 3053 /* set block sizes from the viewer's .info file */ 3054 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3055 /* set global sizes if not set already */ 3056 if (mat->rmap->N < 0) mat->rmap->N = M; 3057 if (mat->cmap->N < 0) mat->cmap->N = N; 3058 PetscCall(PetscLayoutSetUp(mat->rmap)); 3059 PetscCall(PetscLayoutSetUp(mat->cmap)); 3060 3061 /* check if the matrix sizes are correct */ 3062 PetscCall(MatGetSize(mat, &rows, &cols)); 3063 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3064 3065 /* read in row lengths and build row indices */ 3066 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3067 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3068 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3069 rowidxs[0] = 0; 3070 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3071 if (nz != PETSC_MAX_INT) { 3072 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3073 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3074 } 3075 3076 /* read in column indices and matrix values */ 3077 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3078 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3079 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3080 /* store matrix indices and values */ 3081 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3082 PetscCall(PetscFree(rowidxs)); 3083 PetscCall(PetscFree2(colidxs, matvals)); 3084 PetscFunctionReturn(PETSC_SUCCESS); 3085 } 3086 3087 /* Not scalable because of ISAllGather() unless getting all columns. */ 3088 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3089 { 3090 IS iscol_local; 3091 PetscBool isstride; 3092 PetscMPIInt lisstride = 0, gisstride; 3093 3094 PetscFunctionBegin; 3095 /* check if we are grabbing all columns*/ 3096 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3097 3098 if (isstride) { 3099 PetscInt start, len, mstart, mlen; 3100 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3101 PetscCall(ISGetLocalSize(iscol, &len)); 3102 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3103 if (mstart == start && mlen - mstart == len) lisstride = 1; 3104 } 3105 3106 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3107 if (gisstride) { 3108 PetscInt N; 3109 PetscCall(MatGetSize(mat, NULL, &N)); 3110 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3111 PetscCall(ISSetIdentity(iscol_local)); 3112 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3113 } else { 3114 PetscInt cbs; 3115 PetscCall(ISGetBlockSize(iscol, &cbs)); 3116 PetscCall(ISAllGather(iscol, &iscol_local)); 3117 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3118 } 3119 3120 *isseq = iscol_local; 3121 PetscFunctionReturn(PETSC_SUCCESS); 3122 } 3123 3124 /* 3125 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3126 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3127 3128 Input Parameters: 3129 + mat - matrix 3130 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3131 i.e., mat->rstart <= isrow[i] < mat->rend 3132 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3133 i.e., mat->cstart <= iscol[i] < mat->cend 3134 3135 Output Parameters: 3136 + isrow_d - sequential row index set for retrieving mat->A 3137 . iscol_d - sequential column index set for retrieving mat->A 3138 . iscol_o - sequential column index set for retrieving mat->B 3139 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3140 */ 3141 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3142 { 3143 Vec x, cmap; 3144 const PetscInt *is_idx; 3145 PetscScalar *xarray, *cmaparray; 3146 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3148 Mat B = a->B; 3149 Vec lvec = a->lvec, lcmap; 3150 PetscInt i, cstart, cend, Bn = B->cmap->N; 3151 MPI_Comm comm; 3152 VecScatter Mvctx = a->Mvctx; 3153 3154 PetscFunctionBegin; 3155 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3156 PetscCall(ISGetLocalSize(iscol, &ncols)); 3157 3158 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3159 PetscCall(MatCreateVecs(mat, &x, NULL)); 3160 PetscCall(VecSet(x, -1.0)); 3161 PetscCall(VecDuplicate(x, &cmap)); 3162 PetscCall(VecSet(cmap, -1.0)); 3163 3164 /* Get start indices */ 3165 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3166 isstart -= ncols; 3167 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3168 3169 PetscCall(ISGetIndices(iscol, &is_idx)); 3170 PetscCall(VecGetArray(x, &xarray)); 3171 PetscCall(VecGetArray(cmap, &cmaparray)); 3172 PetscCall(PetscMalloc1(ncols, &idx)); 3173 for (i = 0; i < ncols; i++) { 3174 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3175 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3176 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3177 } 3178 PetscCall(VecRestoreArray(x, &xarray)); 3179 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3180 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3181 3182 /* Get iscol_d */ 3183 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3184 PetscCall(ISGetBlockSize(iscol, &i)); 3185 PetscCall(ISSetBlockSize(*iscol_d, i)); 3186 3187 /* Get isrow_d */ 3188 PetscCall(ISGetLocalSize(isrow, &m)); 3189 rstart = mat->rmap->rstart; 3190 PetscCall(PetscMalloc1(m, &idx)); 3191 PetscCall(ISGetIndices(isrow, &is_idx)); 3192 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3193 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3194 3195 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3196 PetscCall(ISGetBlockSize(isrow, &i)); 3197 PetscCall(ISSetBlockSize(*isrow_d, i)); 3198 3199 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3200 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3201 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3202 3203 PetscCall(VecDuplicate(lvec, &lcmap)); 3204 3205 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3206 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3207 3208 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3209 /* off-process column indices */ 3210 count = 0; 3211 PetscCall(PetscMalloc1(Bn, &idx)); 3212 PetscCall(PetscMalloc1(Bn, &cmap1)); 3213 3214 PetscCall(VecGetArray(lvec, &xarray)); 3215 PetscCall(VecGetArray(lcmap, &cmaparray)); 3216 for (i = 0; i < Bn; i++) { 3217 if (PetscRealPart(xarray[i]) > -1.0) { 3218 idx[count] = i; /* local column index in off-diagonal part B */ 3219 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3220 count++; 3221 } 3222 } 3223 PetscCall(VecRestoreArray(lvec, &xarray)); 3224 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3225 3226 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3227 /* cannot ensure iscol_o has same blocksize as iscol! */ 3228 3229 PetscCall(PetscFree(idx)); 3230 *garray = cmap1; 3231 3232 PetscCall(VecDestroy(&x)); 3233 PetscCall(VecDestroy(&cmap)); 3234 PetscCall(VecDestroy(&lcmap)); 3235 PetscFunctionReturn(PETSC_SUCCESS); 3236 } 3237 3238 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3239 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3240 { 3241 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3242 Mat M = NULL; 3243 MPI_Comm comm; 3244 IS iscol_d, isrow_d, iscol_o; 3245 Mat Asub = NULL, Bsub = NULL; 3246 PetscInt n; 3247 3248 PetscFunctionBegin; 3249 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3250 3251 if (call == MAT_REUSE_MATRIX) { 3252 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3253 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3254 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3255 3256 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3257 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3258 3259 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3260 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3261 3262 /* Update diagonal and off-diagonal portions of submat */ 3263 asub = (Mat_MPIAIJ *)(*submat)->data; 3264 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3265 PetscCall(ISGetLocalSize(iscol_o, &n)); 3266 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3267 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3268 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3269 3270 } else { /* call == MAT_INITIAL_MATRIX) */ 3271 const PetscInt *garray; 3272 PetscInt BsubN; 3273 3274 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3275 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3276 3277 /* Create local submatrices Asub and Bsub */ 3278 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3279 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3280 3281 /* Create submatrix M */ 3282 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3283 3284 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3285 asub = (Mat_MPIAIJ *)M->data; 3286 3287 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3288 n = asub->B->cmap->N; 3289 if (BsubN > n) { 3290 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3291 const PetscInt *idx; 3292 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3293 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3294 3295 PetscCall(PetscMalloc1(n, &idx_new)); 3296 j = 0; 3297 PetscCall(ISGetIndices(iscol_o, &idx)); 3298 for (i = 0; i < n; i++) { 3299 if (j >= BsubN) break; 3300 while (subgarray[i] > garray[j]) j++; 3301 3302 if (subgarray[i] == garray[j]) { 3303 idx_new[i] = idx[j++]; 3304 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3305 } 3306 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3307 3308 PetscCall(ISDestroy(&iscol_o)); 3309 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3310 3311 } else if (BsubN < n) { 3312 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3313 } 3314 3315 PetscCall(PetscFree(garray)); 3316 *submat = M; 3317 3318 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3319 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3320 PetscCall(ISDestroy(&isrow_d)); 3321 3322 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3323 PetscCall(ISDestroy(&iscol_d)); 3324 3325 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3326 PetscCall(ISDestroy(&iscol_o)); 3327 } 3328 PetscFunctionReturn(PETSC_SUCCESS); 3329 } 3330 3331 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3332 { 3333 IS iscol_local = NULL, isrow_d; 3334 PetscInt csize; 3335 PetscInt n, i, j, start, end; 3336 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3337 MPI_Comm comm; 3338 3339 PetscFunctionBegin; 3340 /* If isrow has same processor distribution as mat, 3341 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3342 if (call == MAT_REUSE_MATRIX) { 3343 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3344 if (isrow_d) { 3345 sameRowDist = PETSC_TRUE; 3346 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3347 } else { 3348 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3349 if (iscol_local) { 3350 sameRowDist = PETSC_TRUE; 3351 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3352 } 3353 } 3354 } else { 3355 /* Check if isrow has same processor distribution as mat */ 3356 sameDist[0] = PETSC_FALSE; 3357 PetscCall(ISGetLocalSize(isrow, &n)); 3358 if (!n) { 3359 sameDist[0] = PETSC_TRUE; 3360 } else { 3361 PetscCall(ISGetMinMax(isrow, &i, &j)); 3362 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3363 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3364 } 3365 3366 /* Check if iscol has same processor distribution as mat */ 3367 sameDist[1] = PETSC_FALSE; 3368 PetscCall(ISGetLocalSize(iscol, &n)); 3369 if (!n) { 3370 sameDist[1] = PETSC_TRUE; 3371 } else { 3372 PetscCall(ISGetMinMax(iscol, &i, &j)); 3373 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3374 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3375 } 3376 3377 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3378 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3379 sameRowDist = tsameDist[0]; 3380 } 3381 3382 if (sameRowDist) { 3383 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3384 /* isrow and iscol have same processor distribution as mat */ 3385 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3386 PetscFunctionReturn(PETSC_SUCCESS); 3387 } else { /* sameRowDist */ 3388 /* isrow has same processor distribution as mat */ 3389 if (call == MAT_INITIAL_MATRIX) { 3390 PetscBool sorted; 3391 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3392 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3393 PetscCall(ISGetSize(iscol, &i)); 3394 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3395 3396 PetscCall(ISSorted(iscol_local, &sorted)); 3397 if (sorted) { 3398 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3399 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3400 PetscFunctionReturn(PETSC_SUCCESS); 3401 } 3402 } else { /* call == MAT_REUSE_MATRIX */ 3403 IS iscol_sub; 3404 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3405 if (iscol_sub) { 3406 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3407 PetscFunctionReturn(PETSC_SUCCESS); 3408 } 3409 } 3410 } 3411 } 3412 3413 /* General case: iscol -> iscol_local which has global size of iscol */ 3414 if (call == MAT_REUSE_MATRIX) { 3415 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3416 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3417 } else { 3418 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3419 } 3420 3421 PetscCall(ISGetLocalSize(iscol, &csize)); 3422 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3423 3424 if (call == MAT_INITIAL_MATRIX) { 3425 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3426 PetscCall(ISDestroy(&iscol_local)); 3427 } 3428 PetscFunctionReturn(PETSC_SUCCESS); 3429 } 3430 3431 /*@C 3432 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3433 and "off-diagonal" part of the matrix in CSR format. 3434 3435 Collective 3436 3437 Input Parameters: 3438 + comm - MPI communicator 3439 . A - "diagonal" portion of matrix 3440 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3441 - garray - global index of `B` columns 3442 3443 Output Parameter: 3444 . mat - the matrix, with input `A` as its local diagonal matrix 3445 3446 Level: advanced 3447 3448 Notes: 3449 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3450 3451 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3452 3453 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3454 @*/ 3455 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3456 { 3457 Mat_MPIAIJ *maij; 3458 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3459 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3460 const PetscScalar *oa; 3461 Mat Bnew; 3462 PetscInt m, n, N; 3463 MatType mpi_mat_type; 3464 3465 PetscFunctionBegin; 3466 PetscCall(MatCreate(comm, mat)); 3467 PetscCall(MatGetSize(A, &m, &n)); 3468 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3469 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3470 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3471 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3472 3473 /* Get global columns of mat */ 3474 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3475 3476 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3477 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3478 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3479 PetscCall(MatSetType(*mat, mpi_mat_type)); 3480 3481 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3482 maij = (Mat_MPIAIJ *)(*mat)->data; 3483 3484 (*mat)->preallocated = PETSC_TRUE; 3485 3486 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3487 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3488 3489 /* Set A as diagonal portion of *mat */ 3490 maij->A = A; 3491 3492 nz = oi[m]; 3493 for (i = 0; i < nz; i++) { 3494 col = oj[i]; 3495 oj[i] = garray[col]; 3496 } 3497 3498 /* Set Bnew as off-diagonal portion of *mat */ 3499 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3500 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3501 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3502 bnew = (Mat_SeqAIJ *)Bnew->data; 3503 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3504 maij->B = Bnew; 3505 3506 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3507 3508 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3509 b->free_a = PETSC_FALSE; 3510 b->free_ij = PETSC_FALSE; 3511 PetscCall(MatDestroy(&B)); 3512 3513 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3514 bnew->free_a = PETSC_TRUE; 3515 bnew->free_ij = PETSC_TRUE; 3516 3517 /* condense columns of maij->B */ 3518 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3519 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3520 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3521 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3522 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3523 PetscFunctionReturn(PETSC_SUCCESS); 3524 } 3525 3526 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3527 3528 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3529 { 3530 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3531 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3532 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3533 Mat M, Msub, B = a->B; 3534 MatScalar *aa; 3535 Mat_SeqAIJ *aij; 3536 PetscInt *garray = a->garray, *colsub, Ncols; 3537 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3538 IS iscol_sub, iscmap; 3539 const PetscInt *is_idx, *cmap; 3540 PetscBool allcolumns = PETSC_FALSE; 3541 MPI_Comm comm; 3542 3543 PetscFunctionBegin; 3544 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3545 if (call == MAT_REUSE_MATRIX) { 3546 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3547 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3548 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3549 3550 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3551 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3552 3553 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3554 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3555 3556 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3557 3558 } else { /* call == MAT_INITIAL_MATRIX) */ 3559 PetscBool flg; 3560 3561 PetscCall(ISGetLocalSize(iscol, &n)); 3562 PetscCall(ISGetSize(iscol, &Ncols)); 3563 3564 /* (1) iscol -> nonscalable iscol_local */ 3565 /* Check for special case: each processor gets entire matrix columns */ 3566 PetscCall(ISIdentity(iscol_local, &flg)); 3567 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3568 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3569 if (allcolumns) { 3570 iscol_sub = iscol_local; 3571 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3572 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3573 3574 } else { 3575 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3576 PetscInt *idx, *cmap1, k; 3577 PetscCall(PetscMalloc1(Ncols, &idx)); 3578 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3579 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3580 count = 0; 3581 k = 0; 3582 for (i = 0; i < Ncols; i++) { 3583 j = is_idx[i]; 3584 if (j >= cstart && j < cend) { 3585 /* diagonal part of mat */ 3586 idx[count] = j; 3587 cmap1[count++] = i; /* column index in submat */ 3588 } else if (Bn) { 3589 /* off-diagonal part of mat */ 3590 if (j == garray[k]) { 3591 idx[count] = j; 3592 cmap1[count++] = i; /* column index in submat */ 3593 } else if (j > garray[k]) { 3594 while (j > garray[k] && k < Bn - 1) k++; 3595 if (j == garray[k]) { 3596 idx[count] = j; 3597 cmap1[count++] = i; /* column index in submat */ 3598 } 3599 } 3600 } 3601 } 3602 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3603 3604 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3605 PetscCall(ISGetBlockSize(iscol, &cbs)); 3606 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3607 3608 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3609 } 3610 3611 /* (3) Create sequential Msub */ 3612 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3613 } 3614 3615 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3616 aij = (Mat_SeqAIJ *)(Msub)->data; 3617 ii = aij->i; 3618 PetscCall(ISGetIndices(iscmap, &cmap)); 3619 3620 /* 3621 m - number of local rows 3622 Ncols - number of columns (same on all processors) 3623 rstart - first row in new global matrix generated 3624 */ 3625 PetscCall(MatGetSize(Msub, &m, NULL)); 3626 3627 if (call == MAT_INITIAL_MATRIX) { 3628 /* (4) Create parallel newmat */ 3629 PetscMPIInt rank, size; 3630 PetscInt csize; 3631 3632 PetscCallMPI(MPI_Comm_size(comm, &size)); 3633 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3634 3635 /* 3636 Determine the number of non-zeros in the diagonal and off-diagonal 3637 portions of the matrix in order to do correct preallocation 3638 */ 3639 3640 /* first get start and end of "diagonal" columns */ 3641 PetscCall(ISGetLocalSize(iscol, &csize)); 3642 if (csize == PETSC_DECIDE) { 3643 PetscCall(ISGetSize(isrow, &mglobal)); 3644 if (mglobal == Ncols) { /* square matrix */ 3645 nlocal = m; 3646 } else { 3647 nlocal = Ncols / size + ((Ncols % size) > rank); 3648 } 3649 } else { 3650 nlocal = csize; 3651 } 3652 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3653 rstart = rend - nlocal; 3654 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3655 3656 /* next, compute all the lengths */ 3657 jj = aij->j; 3658 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3659 olens = dlens + m; 3660 for (i = 0; i < m; i++) { 3661 jend = ii[i + 1] - ii[i]; 3662 olen = 0; 3663 dlen = 0; 3664 for (j = 0; j < jend; j++) { 3665 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3666 else dlen++; 3667 jj++; 3668 } 3669 olens[i] = olen; 3670 dlens[i] = dlen; 3671 } 3672 3673 PetscCall(ISGetBlockSize(isrow, &bs)); 3674 PetscCall(ISGetBlockSize(iscol, &cbs)); 3675 3676 PetscCall(MatCreate(comm, &M)); 3677 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3678 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3679 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3680 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3681 PetscCall(PetscFree(dlens)); 3682 3683 } else { /* call == MAT_REUSE_MATRIX */ 3684 M = *newmat; 3685 PetscCall(MatGetLocalSize(M, &i, NULL)); 3686 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3687 PetscCall(MatZeroEntries(M)); 3688 /* 3689 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3690 rather than the slower MatSetValues(). 3691 */ 3692 M->was_assembled = PETSC_TRUE; 3693 M->assembled = PETSC_FALSE; 3694 } 3695 3696 /* (5) Set values of Msub to *newmat */ 3697 PetscCall(PetscMalloc1(count, &colsub)); 3698 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3699 3700 jj = aij->j; 3701 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3702 for (i = 0; i < m; i++) { 3703 row = rstart + i; 3704 nz = ii[i + 1] - ii[i]; 3705 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3706 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3707 jj += nz; 3708 aa += nz; 3709 } 3710 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3711 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3712 3713 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3714 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3715 3716 PetscCall(PetscFree(colsub)); 3717 3718 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3719 if (call == MAT_INITIAL_MATRIX) { 3720 *newmat = M; 3721 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3722 PetscCall(MatDestroy(&Msub)); 3723 3724 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3725 PetscCall(ISDestroy(&iscol_sub)); 3726 3727 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3728 PetscCall(ISDestroy(&iscmap)); 3729 3730 if (iscol_local) { 3731 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3732 PetscCall(ISDestroy(&iscol_local)); 3733 } 3734 } 3735 PetscFunctionReturn(PETSC_SUCCESS); 3736 } 3737 3738 /* 3739 Not great since it makes two copies of the submatrix, first an SeqAIJ 3740 in local and then by concatenating the local matrices the end result. 3741 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3742 3743 This requires a sequential iscol with all indices. 3744 */ 3745 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3746 { 3747 PetscMPIInt rank, size; 3748 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3749 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3750 Mat M, Mreuse; 3751 MatScalar *aa, *vwork; 3752 MPI_Comm comm; 3753 Mat_SeqAIJ *aij; 3754 PetscBool colflag, allcolumns = PETSC_FALSE; 3755 3756 PetscFunctionBegin; 3757 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3758 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3759 PetscCallMPI(MPI_Comm_size(comm, &size)); 3760 3761 /* Check for special case: each processor gets entire matrix columns */ 3762 PetscCall(ISIdentity(iscol, &colflag)); 3763 PetscCall(ISGetLocalSize(iscol, &n)); 3764 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3765 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3766 3767 if (call == MAT_REUSE_MATRIX) { 3768 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3769 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3770 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3771 } else { 3772 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3773 } 3774 3775 /* 3776 m - number of local rows 3777 n - number of columns (same on all processors) 3778 rstart - first row in new global matrix generated 3779 */ 3780 PetscCall(MatGetSize(Mreuse, &m, &n)); 3781 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3782 if (call == MAT_INITIAL_MATRIX) { 3783 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3784 ii = aij->i; 3785 jj = aij->j; 3786 3787 /* 3788 Determine the number of non-zeros in the diagonal and off-diagonal 3789 portions of the matrix in order to do correct preallocation 3790 */ 3791 3792 /* first get start and end of "diagonal" columns */ 3793 if (csize == PETSC_DECIDE) { 3794 PetscCall(ISGetSize(isrow, &mglobal)); 3795 if (mglobal == n) { /* square matrix */ 3796 nlocal = m; 3797 } else { 3798 nlocal = n / size + ((n % size) > rank); 3799 } 3800 } else { 3801 nlocal = csize; 3802 } 3803 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3804 rstart = rend - nlocal; 3805 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3806 3807 /* next, compute all the lengths */ 3808 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3809 olens = dlens + m; 3810 for (i = 0; i < m; i++) { 3811 jend = ii[i + 1] - ii[i]; 3812 olen = 0; 3813 dlen = 0; 3814 for (j = 0; j < jend; j++) { 3815 if (*jj < rstart || *jj >= rend) olen++; 3816 else dlen++; 3817 jj++; 3818 } 3819 olens[i] = olen; 3820 dlens[i] = dlen; 3821 } 3822 PetscCall(MatCreate(comm, &M)); 3823 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3824 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3825 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3826 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3827 PetscCall(PetscFree(dlens)); 3828 } else { 3829 PetscInt ml, nl; 3830 3831 M = *newmat; 3832 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3833 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3834 PetscCall(MatZeroEntries(M)); 3835 /* 3836 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3837 rather than the slower MatSetValues(). 3838 */ 3839 M->was_assembled = PETSC_TRUE; 3840 M->assembled = PETSC_FALSE; 3841 } 3842 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3843 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3844 ii = aij->i; 3845 jj = aij->j; 3846 3847 /* trigger copy to CPU if needed */ 3848 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3849 for (i = 0; i < m; i++) { 3850 row = rstart + i; 3851 nz = ii[i + 1] - ii[i]; 3852 cwork = jj; 3853 jj += nz; 3854 vwork = aa; 3855 aa += nz; 3856 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3857 } 3858 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3859 3860 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3861 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3862 *newmat = M; 3863 3864 /* save submatrix used in processor for next request */ 3865 if (call == MAT_INITIAL_MATRIX) { 3866 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3867 PetscCall(MatDestroy(&Mreuse)); 3868 } 3869 PetscFunctionReturn(PETSC_SUCCESS); 3870 } 3871 3872 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3873 { 3874 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3875 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3876 const PetscInt *JJ; 3877 PetscBool nooffprocentries; 3878 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3879 3880 PetscFunctionBegin; 3881 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3882 3883 PetscCall(PetscLayoutSetUp(B->rmap)); 3884 PetscCall(PetscLayoutSetUp(B->cmap)); 3885 m = B->rmap->n; 3886 cstart = B->cmap->rstart; 3887 cend = B->cmap->rend; 3888 rstart = B->rmap->rstart; 3889 3890 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3891 3892 if (PetscDefined(USE_DEBUG)) { 3893 for (i = 0; i < m; i++) { 3894 nnz = Ii[i + 1] - Ii[i]; 3895 JJ = J ? J + Ii[i] : NULL; 3896 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3897 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3898 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3899 } 3900 } 3901 3902 for (i = 0; i < m; i++) { 3903 nnz = Ii[i + 1] - Ii[i]; 3904 JJ = J ? J + Ii[i] : NULL; 3905 nnz_max = PetscMax(nnz_max, nnz); 3906 d = 0; 3907 for (j = 0; j < nnz; j++) { 3908 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3909 } 3910 d_nnz[i] = d; 3911 o_nnz[i] = nnz - d; 3912 } 3913 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3914 PetscCall(PetscFree2(d_nnz, o_nnz)); 3915 3916 for (i = 0; i < m; i++) { 3917 ii = i + rstart; 3918 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J ? J + Ii[i] : NULL, v ? v + Ii[i] : NULL, INSERT_VALUES)); 3919 } 3920 nooffprocentries = B->nooffprocentries; 3921 B->nooffprocentries = PETSC_TRUE; 3922 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3923 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3924 B->nooffprocentries = nooffprocentries; 3925 3926 /* count number of entries below block diagonal */ 3927 PetscCall(PetscFree(Aij->ld)); 3928 PetscCall(PetscCalloc1(m, &ld)); 3929 Aij->ld = ld; 3930 for (i = 0; i < m; i++) { 3931 nnz = Ii[i + 1] - Ii[i]; 3932 j = 0; 3933 while (j < nnz && J[j] < cstart) j++; 3934 ld[i] = j; 3935 if (J) J += nnz; 3936 } 3937 3938 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3939 PetscFunctionReturn(PETSC_SUCCESS); 3940 } 3941 3942 /*@ 3943 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3944 (the default parallel PETSc format). 3945 3946 Collective 3947 3948 Input Parameters: 3949 + B - the matrix 3950 . i - the indices into j for the start of each local row (starts with zero) 3951 . j - the column indices for each local row (starts with zero) 3952 - v - optional values in the matrix 3953 3954 Level: developer 3955 3956 Notes: 3957 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3958 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3959 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3960 3961 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3962 3963 The format which is used for the sparse matrix input, is equivalent to a 3964 row-major ordering.. i.e for the following matrix, the input data expected is 3965 as shown 3966 3967 .vb 3968 1 0 0 3969 2 0 3 P0 3970 ------- 3971 4 5 6 P1 3972 3973 Process0 [P0] rows_owned=[0,1] 3974 i = {0,1,3} [size = nrow+1 = 2+1] 3975 j = {0,0,2} [size = 3] 3976 v = {1,2,3} [size = 3] 3977 3978 Process1 [P1] rows_owned=[2] 3979 i = {0,3} [size = nrow+1 = 1+1] 3980 j = {0,1,2} [size = 3] 3981 v = {4,5,6} [size = 3] 3982 .ve 3983 3984 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 3985 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3986 @*/ 3987 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3988 { 3989 PetscFunctionBegin; 3990 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3991 PetscFunctionReturn(PETSC_SUCCESS); 3992 } 3993 3994 /*@C 3995 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3996 (the default parallel PETSc format). For good matrix assembly performance 3997 the user should preallocate the matrix storage by setting the parameters 3998 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 3999 4000 Collective 4001 4002 Input Parameters: 4003 + B - the matrix 4004 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4005 (same value is used for all local rows) 4006 . d_nnz - array containing the number of nonzeros in the various rows of the 4007 DIAGONAL portion of the local submatrix (possibly different for each row) 4008 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4009 The size of this array is equal to the number of local rows, i.e 'm'. 4010 For matrices that will be factored, you must leave room for (and set) 4011 the diagonal entry even if it is zero. 4012 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4013 submatrix (same value is used for all local rows). 4014 - o_nnz - array containing the number of nonzeros in the various rows of the 4015 OFF-DIAGONAL portion of the local submatrix (possibly different for 4016 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4017 structure. The size of this array is equal to the number 4018 of local rows, i.e 'm'. 4019 4020 Example Usage: 4021 Consider the following 8x8 matrix with 34 non-zero values, that is 4022 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4023 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4024 as follows 4025 4026 .vb 4027 1 2 0 | 0 3 0 | 0 4 4028 Proc0 0 5 6 | 7 0 0 | 8 0 4029 9 0 10 | 11 0 0 | 12 0 4030 ------------------------------------- 4031 13 0 14 | 15 16 17 | 0 0 4032 Proc1 0 18 0 | 19 20 21 | 0 0 4033 0 0 0 | 22 23 0 | 24 0 4034 ------------------------------------- 4035 Proc2 25 26 27 | 0 0 28 | 29 0 4036 30 0 0 | 31 32 33 | 0 34 4037 .ve 4038 4039 This can be represented as a collection of submatrices as 4040 .vb 4041 A B C 4042 D E F 4043 G H I 4044 .ve 4045 4046 Where the submatrices A,B,C are owned by proc0, D,E,F are 4047 owned by proc1, G,H,I are owned by proc2. 4048 4049 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4050 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4051 The 'M','N' parameters are 8,8, and have the same values on all procs. 4052 4053 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4054 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4055 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4056 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4057 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4058 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4059 4060 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4061 allocated for every row of the local diagonal submatrix, and `o_nz` 4062 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4063 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4064 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4065 In this case, the values of `d_nz`, `o_nz` are 4066 .vb 4067 proc0 dnz = 2, o_nz = 2 4068 proc1 dnz = 3, o_nz = 2 4069 proc2 dnz = 1, o_nz = 4 4070 .ve 4071 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4072 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4073 for proc3. i.e we are using 12+15+10=37 storage locations to store 4074 34 values. 4075 4076 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4077 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4078 In the above case the values for `d_nnz`, `o_nnz` are 4079 .vb 4080 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4081 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4082 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4083 .ve 4084 Here the space allocated is sum of all the above values i.e 34, and 4085 hence pre-allocation is perfect. 4086 4087 Level: intermediate 4088 4089 Notes: 4090 If the *_nnz parameter is given then the *_nz parameter is ignored 4091 4092 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4093 storage. The stored row and column indices begin with zero. 4094 See [Sparse Matrices](sec_matsparse) for details. 4095 4096 The parallel matrix is partitioned such that the first m0 rows belong to 4097 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4098 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4099 4100 The DIAGONAL portion of the local submatrix of a processor can be defined 4101 as the submatrix which is obtained by extraction the part corresponding to 4102 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4103 first row that belongs to the processor, r2 is the last row belonging to 4104 the this processor, and c1-c2 is range of indices of the local part of a 4105 vector suitable for applying the matrix to. This is an mxn matrix. In the 4106 common case of a square matrix, the row and column ranges are the same and 4107 the DIAGONAL part is also square. The remaining portion of the local 4108 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4109 4110 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4111 4112 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4113 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4114 You can also run with the option `-info` and look for messages with the string 4115 malloc in them to see if additional memory allocation was needed. 4116 4117 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4118 `MatGetInfo()`, `PetscSplitOwnership()` 4119 @*/ 4120 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4121 { 4122 PetscFunctionBegin; 4123 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4124 PetscValidType(B, 1); 4125 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4126 PetscFunctionReturn(PETSC_SUCCESS); 4127 } 4128 4129 /*@ 4130 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4131 CSR format for the local rows. 4132 4133 Collective 4134 4135 Input Parameters: 4136 + comm - MPI communicator 4137 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4138 . n - This value should be the same as the local size used in creating the 4139 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4140 calculated if N is given) For square matrices n is almost always m. 4141 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4142 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4143 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4144 . j - column indices 4145 - a - optional matrix values 4146 4147 Output Parameter: 4148 . mat - the matrix 4149 4150 Level: intermediate 4151 4152 Notes: 4153 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4154 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4155 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4156 4157 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4158 4159 The format which is used for the sparse matrix input, is equivalent to a 4160 row-major ordering.. i.e for the following matrix, the input data expected is 4161 as shown 4162 4163 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4164 .vb 4165 1 0 0 4166 2 0 3 P0 4167 ------- 4168 4 5 6 P1 4169 4170 Process0 [P0] rows_owned=[0,1] 4171 i = {0,1,3} [size = nrow+1 = 2+1] 4172 j = {0,0,2} [size = 3] 4173 v = {1,2,3} [size = 3] 4174 4175 Process1 [P1] rows_owned=[2] 4176 i = {0,3} [size = nrow+1 = 1+1] 4177 j = {0,1,2} [size = 3] 4178 v = {4,5,6} [size = 3] 4179 .ve 4180 4181 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4182 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4183 @*/ 4184 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4185 { 4186 PetscFunctionBegin; 4187 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4188 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4189 PetscCall(MatCreate(comm, mat)); 4190 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4191 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4192 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4193 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4194 PetscFunctionReturn(PETSC_SUCCESS); 4195 } 4196 4197 /*@ 4198 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4199 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4200 from `MatCreateMPIAIJWithArrays()` 4201 4202 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4203 4204 Collective 4205 4206 Input Parameters: 4207 + mat - the matrix 4208 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4209 . n - This value should be the same as the local size used in creating the 4210 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4211 calculated if N is given) For square matrices n is almost always m. 4212 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4213 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4214 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4215 . J - column indices 4216 - v - matrix values 4217 4218 Level: deprecated 4219 4220 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4221 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()` 4222 @*/ 4223 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4224 { 4225 PetscInt nnz, i; 4226 PetscBool nooffprocentries; 4227 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4228 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4229 PetscScalar *ad, *ao; 4230 PetscInt ldi, Iii, md; 4231 const PetscInt *Adi = Ad->i; 4232 PetscInt *ld = Aij->ld; 4233 4234 PetscFunctionBegin; 4235 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4236 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4237 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4238 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4239 4240 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4241 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4242 4243 for (i = 0; i < m; i++) { 4244 nnz = Ii[i + 1] - Ii[i]; 4245 Iii = Ii[i]; 4246 ldi = ld[i]; 4247 md = Adi[i + 1] - Adi[i]; 4248 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4249 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4250 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4251 ad += md; 4252 ao += nnz - md; 4253 } 4254 nooffprocentries = mat->nooffprocentries; 4255 mat->nooffprocentries = PETSC_TRUE; 4256 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4257 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4258 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4259 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4260 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4261 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4262 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4263 mat->nooffprocentries = nooffprocentries; 4264 PetscFunctionReturn(PETSC_SUCCESS); 4265 } 4266 4267 /*@ 4268 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4269 4270 Collective 4271 4272 Input Parameters: 4273 + mat - the matrix 4274 - v - matrix values, stored by row 4275 4276 Level: intermediate 4277 4278 Note: 4279 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4280 4281 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4282 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4283 @*/ 4284 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4285 { 4286 PetscInt nnz, i, m; 4287 PetscBool nooffprocentries; 4288 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4289 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4290 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4291 PetscScalar *ad, *ao; 4292 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4293 PetscInt ldi, Iii, md; 4294 PetscInt *ld = Aij->ld; 4295 4296 PetscFunctionBegin; 4297 m = mat->rmap->n; 4298 4299 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4300 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4301 Iii = 0; 4302 for (i = 0; i < m; i++) { 4303 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4304 ldi = ld[i]; 4305 md = Adi[i + 1] - Adi[i]; 4306 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4307 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4308 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4309 ad += md; 4310 ao += nnz - md; 4311 Iii += nnz; 4312 } 4313 nooffprocentries = mat->nooffprocentries; 4314 mat->nooffprocentries = PETSC_TRUE; 4315 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4316 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4317 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4318 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4319 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4320 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4321 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4322 mat->nooffprocentries = nooffprocentries; 4323 PetscFunctionReturn(PETSC_SUCCESS); 4324 } 4325 4326 /*@C 4327 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4328 (the default parallel PETSc format). For good matrix assembly performance 4329 the user should preallocate the matrix storage by setting the parameters 4330 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4331 4332 Collective 4333 4334 Input Parameters: 4335 + comm - MPI communicator 4336 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4337 This value should be the same as the local size used in creating the 4338 y vector for the matrix-vector product y = Ax. 4339 . n - This value should be the same as the local size used in creating the 4340 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4341 calculated if N is given) For square matrices n is almost always m. 4342 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4343 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4344 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4345 (same value is used for all local rows) 4346 . d_nnz - array containing the number of nonzeros in the various rows of the 4347 DIAGONAL portion of the local submatrix (possibly different for each row) 4348 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4349 The size of this array is equal to the number of local rows, i.e 'm'. 4350 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4351 submatrix (same value is used for all local rows). 4352 - o_nnz - array containing the number of nonzeros in the various rows of the 4353 OFF-DIAGONAL portion of the local submatrix (possibly different for 4354 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4355 structure. The size of this array is equal to the number 4356 of local rows, i.e 'm'. 4357 4358 Output Parameter: 4359 . A - the matrix 4360 4361 Options Database Keys: 4362 + -mat_no_inode - Do not use inodes 4363 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4364 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4365 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4366 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4367 4368 Level: intermediate 4369 4370 Notes: 4371 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4372 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4373 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4374 4375 If the *_nnz parameter is given then the *_nz parameter is ignored 4376 4377 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4378 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4379 storage requirements for this matrix. 4380 4381 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4382 processor than it must be used on all processors that share the object for 4383 that argument. 4384 4385 The user MUST specify either the local or global matrix dimensions 4386 (possibly both). 4387 4388 The parallel matrix is partitioned across processors such that the 4389 first m0 rows belong to process 0, the next m1 rows belong to 4390 process 1, the next m2 rows belong to process 2 etc.. where 4391 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4392 values corresponding to [m x N] submatrix. 4393 4394 The columns are logically partitioned with the n0 columns belonging 4395 to 0th partition, the next n1 columns belonging to the next 4396 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4397 4398 The DIAGONAL portion of the local submatrix on any given processor 4399 is the submatrix corresponding to the rows and columns m,n 4400 corresponding to the given processor. i.e diagonal matrix on 4401 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4402 etc. The remaining portion of the local submatrix [m x (N-n)] 4403 constitute the OFF-DIAGONAL portion. The example below better 4404 illustrates this concept. 4405 4406 For a square global matrix we define each processor's diagonal portion 4407 to be its local rows and the corresponding columns (a square submatrix); 4408 each processor's off-diagonal portion encompasses the remainder of the 4409 local matrix (a rectangular submatrix). 4410 4411 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4412 4413 When calling this routine with a single process communicator, a matrix of 4414 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4415 type of communicator, use the construction mechanism 4416 .vb 4417 MatCreate(..., &A); 4418 MatSetType(A, MATMPIAIJ); 4419 MatSetSizes(A, m, n, M, N); 4420 MatMPIAIJSetPreallocation(A, ...); 4421 .ve 4422 4423 By default, this format uses inodes (identical nodes) when possible. 4424 We search for consecutive rows with the same nonzero structure, thereby 4425 reusing matrix information to achieve increased efficiency. 4426 4427 Example Usage: 4428 Consider the following 8x8 matrix with 34 non-zero values, that is 4429 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4430 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4431 as follows 4432 4433 .vb 4434 1 2 0 | 0 3 0 | 0 4 4435 Proc0 0 5 6 | 7 0 0 | 8 0 4436 9 0 10 | 11 0 0 | 12 0 4437 ------------------------------------- 4438 13 0 14 | 15 16 17 | 0 0 4439 Proc1 0 18 0 | 19 20 21 | 0 0 4440 0 0 0 | 22 23 0 | 24 0 4441 ------------------------------------- 4442 Proc2 25 26 27 | 0 0 28 | 29 0 4443 30 0 0 | 31 32 33 | 0 34 4444 .ve 4445 4446 This can be represented as a collection of submatrices as 4447 4448 .vb 4449 A B C 4450 D E F 4451 G H I 4452 .ve 4453 4454 Where the submatrices A,B,C are owned by proc0, D,E,F are 4455 owned by proc1, G,H,I are owned by proc2. 4456 4457 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4458 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4459 The 'M','N' parameters are 8,8, and have the same values on all procs. 4460 4461 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4462 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4463 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4464 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4465 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4466 matrix, ans [DF] as another SeqAIJ matrix. 4467 4468 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4469 allocated for every row of the local diagonal submatrix, and `o_nz` 4470 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4471 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4472 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4473 In this case, the values of `d_nz`,`o_nz` are 4474 .vb 4475 proc0 dnz = 2, o_nz = 2 4476 proc1 dnz = 3, o_nz = 2 4477 proc2 dnz = 1, o_nz = 4 4478 .ve 4479 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4480 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4481 for proc3. i.e we are using 12+15+10=37 storage locations to store 4482 34 values. 4483 4484 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4485 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4486 In the above case the values for d_nnz,o_nnz are 4487 .vb 4488 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4489 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4490 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4491 .ve 4492 Here the space allocated is sum of all the above values i.e 34, and 4493 hence pre-allocation is perfect. 4494 4495 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4496 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4497 @*/ 4498 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4499 { 4500 PetscMPIInt size; 4501 4502 PetscFunctionBegin; 4503 PetscCall(MatCreate(comm, A)); 4504 PetscCall(MatSetSizes(*A, m, n, M, N)); 4505 PetscCallMPI(MPI_Comm_size(comm, &size)); 4506 if (size > 1) { 4507 PetscCall(MatSetType(*A, MATMPIAIJ)); 4508 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4509 } else { 4510 PetscCall(MatSetType(*A, MATSEQAIJ)); 4511 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4512 } 4513 PetscFunctionReturn(PETSC_SUCCESS); 4514 } 4515 4516 /*MC 4517 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4518 4519 Synopsis: 4520 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4521 4522 Not Collective 4523 4524 Input Parameter: 4525 . A - the `MATMPIAIJ` matrix 4526 4527 Output Parameters: 4528 + Ad - the diagonal portion of the matrix 4529 . Ao - the off diagonal portion of the matrix 4530 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4531 - ierr - error code 4532 4533 Level: advanced 4534 4535 Note: 4536 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4537 4538 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4539 M*/ 4540 4541 /*MC 4542 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4543 4544 Synopsis: 4545 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4546 4547 Not Collective 4548 4549 Input Parameters: 4550 + A - the `MATMPIAIJ` matrix 4551 . Ad - the diagonal portion of the matrix 4552 . Ao - the off diagonal portion of the matrix 4553 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4554 - ierr - error code 4555 4556 Level: advanced 4557 4558 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4559 M*/ 4560 4561 /*@C 4562 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4563 4564 Not Collective 4565 4566 Input Parameter: 4567 . A - The `MATMPIAIJ` matrix 4568 4569 Output Parameters: 4570 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4571 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4572 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4573 4574 Level: intermediate 4575 4576 Note: 4577 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4578 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4579 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4580 local column numbers to global column numbers in the original matrix. 4581 4582 Fortran Notes: 4583 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4584 4585 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4586 @*/ 4587 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4588 { 4589 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4590 PetscBool flg; 4591 4592 PetscFunctionBegin; 4593 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4594 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4595 if (Ad) *Ad = a->A; 4596 if (Ao) *Ao = a->B; 4597 if (colmap) *colmap = a->garray; 4598 PetscFunctionReturn(PETSC_SUCCESS); 4599 } 4600 4601 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4602 { 4603 PetscInt m, N, i, rstart, nnz, Ii; 4604 PetscInt *indx; 4605 PetscScalar *values; 4606 MatType rootType; 4607 4608 PetscFunctionBegin; 4609 PetscCall(MatGetSize(inmat, &m, &N)); 4610 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4611 PetscInt *dnz, *onz, sum, bs, cbs; 4612 4613 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4614 /* Check sum(n) = N */ 4615 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4616 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4617 4618 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4619 rstart -= m; 4620 4621 MatPreallocateBegin(comm, m, n, dnz, onz); 4622 for (i = 0; i < m; i++) { 4623 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4624 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4625 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4626 } 4627 4628 PetscCall(MatCreate(comm, outmat)); 4629 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4630 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4631 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4632 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4633 PetscCall(MatSetType(*outmat, rootType)); 4634 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4635 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4636 MatPreallocateEnd(dnz, onz); 4637 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4638 } 4639 4640 /* numeric phase */ 4641 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4642 for (i = 0; i < m; i++) { 4643 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4644 Ii = i + rstart; 4645 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4646 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4647 } 4648 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4649 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4650 PetscFunctionReturn(PETSC_SUCCESS); 4651 } 4652 4653 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4654 { 4655 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4656 4657 PetscFunctionBegin; 4658 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4659 PetscCall(PetscFree(merge->id_r)); 4660 PetscCall(PetscFree(merge->len_s)); 4661 PetscCall(PetscFree(merge->len_r)); 4662 PetscCall(PetscFree(merge->bi)); 4663 PetscCall(PetscFree(merge->bj)); 4664 PetscCall(PetscFree(merge->buf_ri[0])); 4665 PetscCall(PetscFree(merge->buf_ri)); 4666 PetscCall(PetscFree(merge->buf_rj[0])); 4667 PetscCall(PetscFree(merge->buf_rj)); 4668 PetscCall(PetscFree(merge->coi)); 4669 PetscCall(PetscFree(merge->coj)); 4670 PetscCall(PetscFree(merge->owners_co)); 4671 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4672 PetscCall(PetscFree(merge)); 4673 PetscFunctionReturn(PETSC_SUCCESS); 4674 } 4675 4676 #include <../src/mat/utils/freespace.h> 4677 #include <petscbt.h> 4678 4679 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4680 { 4681 MPI_Comm comm; 4682 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4683 PetscMPIInt size, rank, taga, *len_s; 4684 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4685 PetscInt proc, m; 4686 PetscInt **buf_ri, **buf_rj; 4687 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4688 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4689 MPI_Request *s_waits, *r_waits; 4690 MPI_Status *status; 4691 const MatScalar *aa, *a_a; 4692 MatScalar **abuf_r, *ba_i; 4693 Mat_Merge_SeqsToMPI *merge; 4694 PetscContainer container; 4695 4696 PetscFunctionBegin; 4697 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4698 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4699 4700 PetscCallMPI(MPI_Comm_size(comm, &size)); 4701 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4702 4703 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4704 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4705 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4706 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4707 aa = a_a; 4708 4709 bi = merge->bi; 4710 bj = merge->bj; 4711 buf_ri = merge->buf_ri; 4712 buf_rj = merge->buf_rj; 4713 4714 PetscCall(PetscMalloc1(size, &status)); 4715 owners = merge->rowmap->range; 4716 len_s = merge->len_s; 4717 4718 /* send and recv matrix values */ 4719 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4720 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4721 4722 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4723 for (proc = 0, k = 0; proc < size; proc++) { 4724 if (!len_s[proc]) continue; 4725 i = owners[proc]; 4726 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4727 k++; 4728 } 4729 4730 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4731 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4732 PetscCall(PetscFree(status)); 4733 4734 PetscCall(PetscFree(s_waits)); 4735 PetscCall(PetscFree(r_waits)); 4736 4737 /* insert mat values of mpimat */ 4738 PetscCall(PetscMalloc1(N, &ba_i)); 4739 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4740 4741 for (k = 0; k < merge->nrecv; k++) { 4742 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4743 nrows = *(buf_ri_k[k]); 4744 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4745 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4746 } 4747 4748 /* set values of ba */ 4749 m = merge->rowmap->n; 4750 for (i = 0; i < m; i++) { 4751 arow = owners[rank] + i; 4752 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4753 bnzi = bi[i + 1] - bi[i]; 4754 PetscCall(PetscArrayzero(ba_i, bnzi)); 4755 4756 /* add local non-zero vals of this proc's seqmat into ba */ 4757 anzi = ai[arow + 1] - ai[arow]; 4758 aj = a->j + ai[arow]; 4759 aa = a_a + ai[arow]; 4760 nextaj = 0; 4761 for (j = 0; nextaj < anzi; j++) { 4762 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4763 ba_i[j] += aa[nextaj++]; 4764 } 4765 } 4766 4767 /* add received vals into ba */ 4768 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4769 /* i-th row */ 4770 if (i == *nextrow[k]) { 4771 anzi = *(nextai[k] + 1) - *nextai[k]; 4772 aj = buf_rj[k] + *(nextai[k]); 4773 aa = abuf_r[k] + *(nextai[k]); 4774 nextaj = 0; 4775 for (j = 0; nextaj < anzi; j++) { 4776 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4777 ba_i[j] += aa[nextaj++]; 4778 } 4779 } 4780 nextrow[k]++; 4781 nextai[k]++; 4782 } 4783 } 4784 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4785 } 4786 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4787 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4788 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4789 4790 PetscCall(PetscFree(abuf_r[0])); 4791 PetscCall(PetscFree(abuf_r)); 4792 PetscCall(PetscFree(ba_i)); 4793 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4794 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4795 PetscFunctionReturn(PETSC_SUCCESS); 4796 } 4797 4798 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4799 { 4800 Mat B_mpi; 4801 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4802 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4803 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4804 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4805 PetscInt len, proc, *dnz, *onz, bs, cbs; 4806 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4807 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4808 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4809 MPI_Status *status; 4810 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4811 PetscBT lnkbt; 4812 Mat_Merge_SeqsToMPI *merge; 4813 PetscContainer container; 4814 4815 PetscFunctionBegin; 4816 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4817 4818 /* make sure it is a PETSc comm */ 4819 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4820 PetscCallMPI(MPI_Comm_size(comm, &size)); 4821 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4822 4823 PetscCall(PetscNew(&merge)); 4824 PetscCall(PetscMalloc1(size, &status)); 4825 4826 /* determine row ownership */ 4827 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4828 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4829 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4830 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4831 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4832 PetscCall(PetscMalloc1(size, &len_si)); 4833 PetscCall(PetscMalloc1(size, &merge->len_s)); 4834 4835 m = merge->rowmap->n; 4836 owners = merge->rowmap->range; 4837 4838 /* determine the number of messages to send, their lengths */ 4839 len_s = merge->len_s; 4840 4841 len = 0; /* length of buf_si[] */ 4842 merge->nsend = 0; 4843 for (proc = 0; proc < size; proc++) { 4844 len_si[proc] = 0; 4845 if (proc == rank) { 4846 len_s[proc] = 0; 4847 } else { 4848 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4849 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4850 } 4851 if (len_s[proc]) { 4852 merge->nsend++; 4853 nrows = 0; 4854 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4855 if (ai[i + 1] > ai[i]) nrows++; 4856 } 4857 len_si[proc] = 2 * (nrows + 1); 4858 len += len_si[proc]; 4859 } 4860 } 4861 4862 /* determine the number and length of messages to receive for ij-structure */ 4863 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4864 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4865 4866 /* post the Irecv of j-structure */ 4867 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4868 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4869 4870 /* post the Isend of j-structure */ 4871 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4872 4873 for (proc = 0, k = 0; proc < size; proc++) { 4874 if (!len_s[proc]) continue; 4875 i = owners[proc]; 4876 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4877 k++; 4878 } 4879 4880 /* receives and sends of j-structure are complete */ 4881 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4882 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4883 4884 /* send and recv i-structure */ 4885 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4886 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4887 4888 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4889 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4890 for (proc = 0, k = 0; proc < size; proc++) { 4891 if (!len_s[proc]) continue; 4892 /* form outgoing message for i-structure: 4893 buf_si[0]: nrows to be sent 4894 [1:nrows]: row index (global) 4895 [nrows+1:2*nrows+1]: i-structure index 4896 */ 4897 nrows = len_si[proc] / 2 - 1; 4898 buf_si_i = buf_si + nrows + 1; 4899 buf_si[0] = nrows; 4900 buf_si_i[0] = 0; 4901 nrows = 0; 4902 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4903 anzi = ai[i + 1] - ai[i]; 4904 if (anzi) { 4905 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4906 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4907 nrows++; 4908 } 4909 } 4910 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4911 k++; 4912 buf_si += len_si[proc]; 4913 } 4914 4915 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4916 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4917 4918 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4919 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4920 4921 PetscCall(PetscFree(len_si)); 4922 PetscCall(PetscFree(len_ri)); 4923 PetscCall(PetscFree(rj_waits)); 4924 PetscCall(PetscFree2(si_waits, sj_waits)); 4925 PetscCall(PetscFree(ri_waits)); 4926 PetscCall(PetscFree(buf_s)); 4927 PetscCall(PetscFree(status)); 4928 4929 /* compute a local seq matrix in each processor */ 4930 /* allocate bi array and free space for accumulating nonzero column info */ 4931 PetscCall(PetscMalloc1(m + 1, &bi)); 4932 bi[0] = 0; 4933 4934 /* create and initialize a linked list */ 4935 nlnk = N + 1; 4936 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4937 4938 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4939 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4940 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4941 4942 current_space = free_space; 4943 4944 /* determine symbolic info for each local row */ 4945 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4946 4947 for (k = 0; k < merge->nrecv; k++) { 4948 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4949 nrows = *buf_ri_k[k]; 4950 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4951 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4952 } 4953 4954 MatPreallocateBegin(comm, m, n, dnz, onz); 4955 len = 0; 4956 for (i = 0; i < m; i++) { 4957 bnzi = 0; 4958 /* add local non-zero cols of this proc's seqmat into lnk */ 4959 arow = owners[rank] + i; 4960 anzi = ai[arow + 1] - ai[arow]; 4961 aj = a->j + ai[arow]; 4962 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4963 bnzi += nlnk; 4964 /* add received col data into lnk */ 4965 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4966 if (i == *nextrow[k]) { /* i-th row */ 4967 anzi = *(nextai[k] + 1) - *nextai[k]; 4968 aj = buf_rj[k] + *nextai[k]; 4969 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4970 bnzi += nlnk; 4971 nextrow[k]++; 4972 nextai[k]++; 4973 } 4974 } 4975 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4976 4977 /* if free space is not available, make more free space */ 4978 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4979 /* copy data into free space, then initialize lnk */ 4980 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4981 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4982 4983 current_space->array += bnzi; 4984 current_space->local_used += bnzi; 4985 current_space->local_remaining -= bnzi; 4986 4987 bi[i + 1] = bi[i] + bnzi; 4988 } 4989 4990 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4991 4992 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 4993 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4994 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4995 4996 /* create symbolic parallel matrix B_mpi */ 4997 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4998 PetscCall(MatCreate(comm, &B_mpi)); 4999 if (n == PETSC_DECIDE) { 5000 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5001 } else { 5002 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5003 } 5004 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5005 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5006 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5007 MatPreallocateEnd(dnz, onz); 5008 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5009 5010 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5011 B_mpi->assembled = PETSC_FALSE; 5012 merge->bi = bi; 5013 merge->bj = bj; 5014 merge->buf_ri = buf_ri; 5015 merge->buf_rj = buf_rj; 5016 merge->coi = NULL; 5017 merge->coj = NULL; 5018 merge->owners_co = NULL; 5019 5020 PetscCall(PetscCommDestroy(&comm)); 5021 5022 /* attach the supporting struct to B_mpi for reuse */ 5023 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5024 PetscCall(PetscContainerSetPointer(container, merge)); 5025 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5026 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5027 PetscCall(PetscContainerDestroy(&container)); 5028 *mpimat = B_mpi; 5029 5030 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5031 PetscFunctionReturn(PETSC_SUCCESS); 5032 } 5033 5034 /*@C 5035 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5036 matrices from each processor 5037 5038 Collective 5039 5040 Input Parameters: 5041 + comm - the communicators the parallel matrix will live on 5042 . seqmat - the input sequential matrices 5043 . m - number of local rows (or `PETSC_DECIDE`) 5044 . n - number of local columns (or `PETSC_DECIDE`) 5045 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5046 5047 Output Parameter: 5048 . mpimat - the parallel matrix generated 5049 5050 Level: advanced 5051 5052 Note: 5053 The dimensions of the sequential matrix in each processor MUST be the same. 5054 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5055 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5056 5057 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5058 @*/ 5059 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5060 { 5061 PetscMPIInt size; 5062 5063 PetscFunctionBegin; 5064 PetscCallMPI(MPI_Comm_size(comm, &size)); 5065 if (size == 1) { 5066 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5067 if (scall == MAT_INITIAL_MATRIX) { 5068 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5069 } else { 5070 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5071 } 5072 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5073 PetscFunctionReturn(PETSC_SUCCESS); 5074 } 5075 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5076 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5077 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5078 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5079 PetscFunctionReturn(PETSC_SUCCESS); 5080 } 5081 5082 /*@ 5083 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5084 5085 Not Collective 5086 5087 Input Parameter: 5088 . A - the matrix 5089 5090 Output Parameter: 5091 . A_loc - the local sequential matrix generated 5092 5093 Level: developer 5094 5095 Notes: 5096 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5097 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5098 `n` is the global column count obtained with `MatGetSize()` 5099 5100 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5101 5102 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5103 5104 Destroy the matrix with `MatDestroy()` 5105 5106 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5107 @*/ 5108 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5109 { 5110 PetscBool mpi; 5111 5112 PetscFunctionBegin; 5113 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5114 if (mpi) { 5115 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5116 } else { 5117 *A_loc = A; 5118 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5119 } 5120 PetscFunctionReturn(PETSC_SUCCESS); 5121 } 5122 5123 /*@ 5124 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5125 5126 Not Collective 5127 5128 Input Parameters: 5129 + A - the matrix 5130 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5131 5132 Output Parameter: 5133 . A_loc - the local sequential matrix generated 5134 5135 Level: developer 5136 5137 Notes: 5138 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5139 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5140 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5141 5142 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5143 5144 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5145 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5146 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5147 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5148 5149 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5150 @*/ 5151 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5152 { 5153 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5154 Mat_SeqAIJ *mat, *a, *b; 5155 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5156 const PetscScalar *aa, *ba, *aav, *bav; 5157 PetscScalar *ca, *cam; 5158 PetscMPIInt size; 5159 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5160 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5161 PetscBool match; 5162 5163 PetscFunctionBegin; 5164 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5165 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5166 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5167 if (size == 1) { 5168 if (scall == MAT_INITIAL_MATRIX) { 5169 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5170 *A_loc = mpimat->A; 5171 } else if (scall == MAT_REUSE_MATRIX) { 5172 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5173 } 5174 PetscFunctionReturn(PETSC_SUCCESS); 5175 } 5176 5177 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5178 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5179 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5180 ai = a->i; 5181 aj = a->j; 5182 bi = b->i; 5183 bj = b->j; 5184 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5185 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5186 aa = aav; 5187 ba = bav; 5188 if (scall == MAT_INITIAL_MATRIX) { 5189 PetscCall(PetscMalloc1(1 + am, &ci)); 5190 ci[0] = 0; 5191 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5192 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5193 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5194 k = 0; 5195 for (i = 0; i < am; i++) { 5196 ncols_o = bi[i + 1] - bi[i]; 5197 ncols_d = ai[i + 1] - ai[i]; 5198 /* off-diagonal portion of A */ 5199 for (jo = 0; jo < ncols_o; jo++) { 5200 col = cmap[*bj]; 5201 if (col >= cstart) break; 5202 cj[k] = col; 5203 bj++; 5204 ca[k++] = *ba++; 5205 } 5206 /* diagonal portion of A */ 5207 for (j = 0; j < ncols_d; j++) { 5208 cj[k] = cstart + *aj++; 5209 ca[k++] = *aa++; 5210 } 5211 /* off-diagonal portion of A */ 5212 for (j = jo; j < ncols_o; j++) { 5213 cj[k] = cmap[*bj++]; 5214 ca[k++] = *ba++; 5215 } 5216 } 5217 /* put together the new matrix */ 5218 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5219 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5220 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5221 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5222 mat->free_a = PETSC_TRUE; 5223 mat->free_ij = PETSC_TRUE; 5224 mat->nonew = 0; 5225 } else if (scall == MAT_REUSE_MATRIX) { 5226 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5227 ci = mat->i; 5228 cj = mat->j; 5229 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5230 for (i = 0; i < am; i++) { 5231 /* off-diagonal portion of A */ 5232 ncols_o = bi[i + 1] - bi[i]; 5233 for (jo = 0; jo < ncols_o; jo++) { 5234 col = cmap[*bj]; 5235 if (col >= cstart) break; 5236 *cam++ = *ba++; 5237 bj++; 5238 } 5239 /* diagonal portion of A */ 5240 ncols_d = ai[i + 1] - ai[i]; 5241 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5242 /* off-diagonal portion of A */ 5243 for (j = jo; j < ncols_o; j++) { 5244 *cam++ = *ba++; 5245 bj++; 5246 } 5247 } 5248 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5249 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5250 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5251 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5252 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5253 PetscFunctionReturn(PETSC_SUCCESS); 5254 } 5255 5256 /*@ 5257 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5258 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5259 5260 Not Collective 5261 5262 Input Parameters: 5263 + A - the matrix 5264 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5265 5266 Output Parameters: 5267 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5268 - A_loc - the local sequential matrix generated 5269 5270 Level: developer 5271 5272 Note: 5273 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5274 part, then those associated with the off diagonal part (in its local ordering) 5275 5276 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5277 @*/ 5278 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5279 { 5280 Mat Ao, Ad; 5281 const PetscInt *cmap; 5282 PetscMPIInt size; 5283 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5284 5285 PetscFunctionBegin; 5286 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5287 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5288 if (size == 1) { 5289 if (scall == MAT_INITIAL_MATRIX) { 5290 PetscCall(PetscObjectReference((PetscObject)Ad)); 5291 *A_loc = Ad; 5292 } else if (scall == MAT_REUSE_MATRIX) { 5293 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5294 } 5295 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5296 PetscFunctionReturn(PETSC_SUCCESS); 5297 } 5298 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5299 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5300 if (f) { 5301 PetscCall((*f)(A, scall, glob, A_loc)); 5302 } else { 5303 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5304 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5305 Mat_SeqAIJ *c; 5306 PetscInt *ai = a->i, *aj = a->j; 5307 PetscInt *bi = b->i, *bj = b->j; 5308 PetscInt *ci, *cj; 5309 const PetscScalar *aa, *ba; 5310 PetscScalar *ca; 5311 PetscInt i, j, am, dn, on; 5312 5313 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5314 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5315 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5316 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5317 if (scall == MAT_INITIAL_MATRIX) { 5318 PetscInt k; 5319 PetscCall(PetscMalloc1(1 + am, &ci)); 5320 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5321 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5322 ci[0] = 0; 5323 for (i = 0, k = 0; i < am; i++) { 5324 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5325 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5326 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5327 /* diagonal portion of A */ 5328 for (j = 0; j < ncols_d; j++, k++) { 5329 cj[k] = *aj++; 5330 ca[k] = *aa++; 5331 } 5332 /* off-diagonal portion of A */ 5333 for (j = 0; j < ncols_o; j++, k++) { 5334 cj[k] = dn + *bj++; 5335 ca[k] = *ba++; 5336 } 5337 } 5338 /* put together the new matrix */ 5339 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5340 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5341 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5342 c = (Mat_SeqAIJ *)(*A_loc)->data; 5343 c->free_a = PETSC_TRUE; 5344 c->free_ij = PETSC_TRUE; 5345 c->nonew = 0; 5346 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5347 } else if (scall == MAT_REUSE_MATRIX) { 5348 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5349 for (i = 0; i < am; i++) { 5350 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5351 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5352 /* diagonal portion of A */ 5353 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5354 /* off-diagonal portion of A */ 5355 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5356 } 5357 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5358 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5359 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5360 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5361 if (glob) { 5362 PetscInt cst, *gidx; 5363 5364 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5365 PetscCall(PetscMalloc1(dn + on, &gidx)); 5366 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5367 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5368 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5369 } 5370 } 5371 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5372 PetscFunctionReturn(PETSC_SUCCESS); 5373 } 5374 5375 /*@C 5376 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5377 5378 Not Collective 5379 5380 Input Parameters: 5381 + A - the matrix 5382 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5383 . row - index set of rows to extract (or `NULL`) 5384 - col - index set of columns to extract (or `NULL`) 5385 5386 Output Parameter: 5387 . A_loc - the local sequential matrix generated 5388 5389 Level: developer 5390 5391 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5392 @*/ 5393 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5394 { 5395 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5396 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5397 IS isrowa, iscola; 5398 Mat *aloc; 5399 PetscBool match; 5400 5401 PetscFunctionBegin; 5402 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5403 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5404 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5405 if (!row) { 5406 start = A->rmap->rstart; 5407 end = A->rmap->rend; 5408 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5409 } else { 5410 isrowa = *row; 5411 } 5412 if (!col) { 5413 start = A->cmap->rstart; 5414 cmap = a->garray; 5415 nzA = a->A->cmap->n; 5416 nzB = a->B->cmap->n; 5417 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5418 ncols = 0; 5419 for (i = 0; i < nzB; i++) { 5420 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5421 else break; 5422 } 5423 imark = i; 5424 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5425 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5426 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5427 } else { 5428 iscola = *col; 5429 } 5430 if (scall != MAT_INITIAL_MATRIX) { 5431 PetscCall(PetscMalloc1(1, &aloc)); 5432 aloc[0] = *A_loc; 5433 } 5434 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5435 if (!col) { /* attach global id of condensed columns */ 5436 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5437 } 5438 *A_loc = aloc[0]; 5439 PetscCall(PetscFree(aloc)); 5440 if (!row) PetscCall(ISDestroy(&isrowa)); 5441 if (!col) PetscCall(ISDestroy(&iscola)); 5442 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5443 PetscFunctionReturn(PETSC_SUCCESS); 5444 } 5445 5446 /* 5447 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5448 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5449 * on a global size. 5450 * */ 5451 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5452 { 5453 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5454 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5455 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5456 PetscMPIInt owner; 5457 PetscSFNode *iremote, *oiremote; 5458 const PetscInt *lrowindices; 5459 PetscSF sf, osf; 5460 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5461 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5462 MPI_Comm comm; 5463 ISLocalToGlobalMapping mapping; 5464 const PetscScalar *pd_a, *po_a; 5465 5466 PetscFunctionBegin; 5467 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5468 /* plocalsize is the number of roots 5469 * nrows is the number of leaves 5470 * */ 5471 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5472 PetscCall(ISGetLocalSize(rows, &nrows)); 5473 PetscCall(PetscCalloc1(nrows, &iremote)); 5474 PetscCall(ISGetIndices(rows, &lrowindices)); 5475 for (i = 0; i < nrows; i++) { 5476 /* Find a remote index and an owner for a row 5477 * The row could be local or remote 5478 * */ 5479 owner = 0; 5480 lidx = 0; 5481 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5482 iremote[i].index = lidx; 5483 iremote[i].rank = owner; 5484 } 5485 /* Create SF to communicate how many nonzero columns for each row */ 5486 PetscCall(PetscSFCreate(comm, &sf)); 5487 /* SF will figure out the number of nonzero colunms for each row, and their 5488 * offsets 5489 * */ 5490 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5491 PetscCall(PetscSFSetFromOptions(sf)); 5492 PetscCall(PetscSFSetUp(sf)); 5493 5494 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5495 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5496 PetscCall(PetscCalloc1(nrows, &pnnz)); 5497 roffsets[0] = 0; 5498 roffsets[1] = 0; 5499 for (i = 0; i < plocalsize; i++) { 5500 /* diag */ 5501 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5502 /* off diag */ 5503 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5504 /* compute offsets so that we relative location for each row */ 5505 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5506 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5507 } 5508 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5509 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5510 /* 'r' means root, and 'l' means leaf */ 5511 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5512 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5513 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5514 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5515 PetscCall(PetscSFDestroy(&sf)); 5516 PetscCall(PetscFree(roffsets)); 5517 PetscCall(PetscFree(nrcols)); 5518 dntotalcols = 0; 5519 ontotalcols = 0; 5520 ncol = 0; 5521 for (i = 0; i < nrows; i++) { 5522 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5523 ncol = PetscMax(pnnz[i], ncol); 5524 /* diag */ 5525 dntotalcols += nlcols[i * 2 + 0]; 5526 /* off diag */ 5527 ontotalcols += nlcols[i * 2 + 1]; 5528 } 5529 /* We do not need to figure the right number of columns 5530 * since all the calculations will be done by going through the raw data 5531 * */ 5532 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5533 PetscCall(MatSetUp(*P_oth)); 5534 PetscCall(PetscFree(pnnz)); 5535 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5536 /* diag */ 5537 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5538 /* off diag */ 5539 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5540 /* diag */ 5541 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5542 /* off diag */ 5543 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5544 dntotalcols = 0; 5545 ontotalcols = 0; 5546 ntotalcols = 0; 5547 for (i = 0; i < nrows; i++) { 5548 owner = 0; 5549 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5550 /* Set iremote for diag matrix */ 5551 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5552 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5553 iremote[dntotalcols].rank = owner; 5554 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5555 ilocal[dntotalcols++] = ntotalcols++; 5556 } 5557 /* off diag */ 5558 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5559 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5560 oiremote[ontotalcols].rank = owner; 5561 oilocal[ontotalcols++] = ntotalcols++; 5562 } 5563 } 5564 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5565 PetscCall(PetscFree(loffsets)); 5566 PetscCall(PetscFree(nlcols)); 5567 PetscCall(PetscSFCreate(comm, &sf)); 5568 /* P serves as roots and P_oth is leaves 5569 * Diag matrix 5570 * */ 5571 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5572 PetscCall(PetscSFSetFromOptions(sf)); 5573 PetscCall(PetscSFSetUp(sf)); 5574 5575 PetscCall(PetscSFCreate(comm, &osf)); 5576 /* Off diag */ 5577 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5578 PetscCall(PetscSFSetFromOptions(osf)); 5579 PetscCall(PetscSFSetUp(osf)); 5580 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5581 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5582 /* We operate on the matrix internal data for saving memory */ 5583 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5584 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5585 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5586 /* Convert to global indices for diag matrix */ 5587 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5588 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5589 /* We want P_oth store global indices */ 5590 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5591 /* Use memory scalable approach */ 5592 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5593 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5594 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5595 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5596 /* Convert back to local indices */ 5597 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5598 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5599 nout = 0; 5600 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5601 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5602 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5603 /* Exchange values */ 5604 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5605 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5606 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5607 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5608 /* Stop PETSc from shrinking memory */ 5609 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5610 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5611 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5612 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5613 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5614 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5615 PetscCall(PetscSFDestroy(&sf)); 5616 PetscCall(PetscSFDestroy(&osf)); 5617 PetscFunctionReturn(PETSC_SUCCESS); 5618 } 5619 5620 /* 5621 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5622 * This supports MPIAIJ and MAIJ 5623 * */ 5624 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5625 { 5626 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5627 Mat_SeqAIJ *p_oth; 5628 IS rows, map; 5629 PetscHMapI hamp; 5630 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5631 MPI_Comm comm; 5632 PetscSF sf, osf; 5633 PetscBool has; 5634 5635 PetscFunctionBegin; 5636 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5637 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5638 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5639 * and then create a submatrix (that often is an overlapping matrix) 5640 * */ 5641 if (reuse == MAT_INITIAL_MATRIX) { 5642 /* Use a hash table to figure out unique keys */ 5643 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5644 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5645 count = 0; 5646 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5647 for (i = 0; i < a->B->cmap->n; i++) { 5648 key = a->garray[i] / dof; 5649 PetscCall(PetscHMapIHas(hamp, key, &has)); 5650 if (!has) { 5651 mapping[i] = count; 5652 PetscCall(PetscHMapISet(hamp, key, count++)); 5653 } else { 5654 /* Current 'i' has the same value the previous step */ 5655 mapping[i] = count - 1; 5656 } 5657 } 5658 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5659 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5660 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5661 PetscCall(PetscCalloc1(htsize, &rowindices)); 5662 off = 0; 5663 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5664 PetscCall(PetscHMapIDestroy(&hamp)); 5665 PetscCall(PetscSortInt(htsize, rowindices)); 5666 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5667 /* In case, the matrix was already created but users want to recreate the matrix */ 5668 PetscCall(MatDestroy(P_oth)); 5669 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5670 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5671 PetscCall(ISDestroy(&map)); 5672 PetscCall(ISDestroy(&rows)); 5673 } else if (reuse == MAT_REUSE_MATRIX) { 5674 /* If matrix was already created, we simply update values using SF objects 5675 * that as attached to the matrix earlier. 5676 */ 5677 const PetscScalar *pd_a, *po_a; 5678 5679 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5680 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5681 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5682 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5683 /* Update values in place */ 5684 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5685 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5686 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5687 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5688 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5689 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5690 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5691 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5692 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5693 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5694 PetscFunctionReturn(PETSC_SUCCESS); 5695 } 5696 5697 /*@C 5698 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5699 5700 Collective 5701 5702 Input Parameters: 5703 + A - the first matrix in `MATMPIAIJ` format 5704 . B - the second matrix in `MATMPIAIJ` format 5705 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5706 5707 Output Parameters: 5708 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5709 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5710 - B_seq - the sequential matrix generated 5711 5712 Level: developer 5713 5714 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5715 @*/ 5716 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5717 { 5718 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5719 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5720 IS isrowb, iscolb; 5721 Mat *bseq = NULL; 5722 5723 PetscFunctionBegin; 5724 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5725 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5726 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5727 5728 if (scall == MAT_INITIAL_MATRIX) { 5729 start = A->cmap->rstart; 5730 cmap = a->garray; 5731 nzA = a->A->cmap->n; 5732 nzB = a->B->cmap->n; 5733 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5734 ncols = 0; 5735 for (i = 0; i < nzB; i++) { /* row < local row index */ 5736 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5737 else break; 5738 } 5739 imark = i; 5740 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5741 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5742 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5743 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5744 } else { 5745 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5746 isrowb = *rowb; 5747 iscolb = *colb; 5748 PetscCall(PetscMalloc1(1, &bseq)); 5749 bseq[0] = *B_seq; 5750 } 5751 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5752 *B_seq = bseq[0]; 5753 PetscCall(PetscFree(bseq)); 5754 if (!rowb) { 5755 PetscCall(ISDestroy(&isrowb)); 5756 } else { 5757 *rowb = isrowb; 5758 } 5759 if (!colb) { 5760 PetscCall(ISDestroy(&iscolb)); 5761 } else { 5762 *colb = iscolb; 5763 } 5764 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5765 PetscFunctionReturn(PETSC_SUCCESS); 5766 } 5767 5768 /* 5769 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5770 of the OFF-DIAGONAL portion of local A 5771 5772 Collective 5773 5774 Input Parameters: 5775 + A,B - the matrices in `MATMPIAIJ` format 5776 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5777 5778 Output Parameter: 5779 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5780 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5781 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5782 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5783 5784 Developer Note: 5785 This directly accesses information inside the VecScatter associated with the matrix-vector product 5786 for this matrix. This is not desirable.. 5787 5788 Level: developer 5789 5790 */ 5791 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5792 { 5793 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5794 Mat_SeqAIJ *b_oth; 5795 VecScatter ctx; 5796 MPI_Comm comm; 5797 const PetscMPIInt *rprocs, *sprocs; 5798 const PetscInt *srow, *rstarts, *sstarts; 5799 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5800 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5801 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5802 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5803 PetscMPIInt size, tag, rank, nreqs; 5804 5805 PetscFunctionBegin; 5806 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5807 PetscCallMPI(MPI_Comm_size(comm, &size)); 5808 5809 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5810 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5811 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5812 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5813 5814 if (size == 1) { 5815 startsj_s = NULL; 5816 bufa_ptr = NULL; 5817 *B_oth = NULL; 5818 PetscFunctionReturn(PETSC_SUCCESS); 5819 } 5820 5821 ctx = a->Mvctx; 5822 tag = ((PetscObject)ctx)->tag; 5823 5824 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5825 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5826 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5827 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5828 PetscCall(PetscMalloc1(nreqs, &reqs)); 5829 rwaits = reqs; 5830 swaits = reqs + nrecvs; 5831 5832 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5833 if (scall == MAT_INITIAL_MATRIX) { 5834 /* i-array */ 5835 /* post receives */ 5836 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5837 for (i = 0; i < nrecvs; i++) { 5838 rowlen = rvalues + rstarts[i] * rbs; 5839 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5840 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5841 } 5842 5843 /* pack the outgoing message */ 5844 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5845 5846 sstartsj[0] = 0; 5847 rstartsj[0] = 0; 5848 len = 0; /* total length of j or a array to be sent */ 5849 if (nsends) { 5850 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5851 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5852 } 5853 for (i = 0; i < nsends; i++) { 5854 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5855 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5856 for (j = 0; j < nrows; j++) { 5857 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5858 for (l = 0; l < sbs; l++) { 5859 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5860 5861 rowlen[j * sbs + l] = ncols; 5862 5863 len += ncols; 5864 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5865 } 5866 k++; 5867 } 5868 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5869 5870 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5871 } 5872 /* recvs and sends of i-array are completed */ 5873 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5874 PetscCall(PetscFree(svalues)); 5875 5876 /* allocate buffers for sending j and a arrays */ 5877 PetscCall(PetscMalloc1(len + 1, &bufj)); 5878 PetscCall(PetscMalloc1(len + 1, &bufa)); 5879 5880 /* create i-array of B_oth */ 5881 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5882 5883 b_othi[0] = 0; 5884 len = 0; /* total length of j or a array to be received */ 5885 k = 0; 5886 for (i = 0; i < nrecvs; i++) { 5887 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5888 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5889 for (j = 0; j < nrows; j++) { 5890 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5891 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5892 k++; 5893 } 5894 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5895 } 5896 PetscCall(PetscFree(rvalues)); 5897 5898 /* allocate space for j and a arrays of B_oth */ 5899 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5900 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5901 5902 /* j-array */ 5903 /* post receives of j-array */ 5904 for (i = 0; i < nrecvs; i++) { 5905 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5906 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5907 } 5908 5909 /* pack the outgoing message j-array */ 5910 if (nsends) k = sstarts[0]; 5911 for (i = 0; i < nsends; i++) { 5912 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5913 bufJ = bufj + sstartsj[i]; 5914 for (j = 0; j < nrows; j++) { 5915 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5916 for (ll = 0; ll < sbs; ll++) { 5917 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5918 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5919 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5920 } 5921 } 5922 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5923 } 5924 5925 /* recvs and sends of j-array are completed */ 5926 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5927 } else if (scall == MAT_REUSE_MATRIX) { 5928 sstartsj = *startsj_s; 5929 rstartsj = *startsj_r; 5930 bufa = *bufa_ptr; 5931 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5932 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5933 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5934 5935 /* a-array */ 5936 /* post receives of a-array */ 5937 for (i = 0; i < nrecvs; i++) { 5938 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5939 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5940 } 5941 5942 /* pack the outgoing message a-array */ 5943 if (nsends) k = sstarts[0]; 5944 for (i = 0; i < nsends; i++) { 5945 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5946 bufA = bufa + sstartsj[i]; 5947 for (j = 0; j < nrows; j++) { 5948 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5949 for (ll = 0; ll < sbs; ll++) { 5950 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5951 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5952 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5953 } 5954 } 5955 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5956 } 5957 /* recvs and sends of a-array are completed */ 5958 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5959 PetscCall(PetscFree(reqs)); 5960 5961 if (scall == MAT_INITIAL_MATRIX) { 5962 /* put together the new matrix */ 5963 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5964 5965 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5966 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5967 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5968 b_oth->free_a = PETSC_TRUE; 5969 b_oth->free_ij = PETSC_TRUE; 5970 b_oth->nonew = 0; 5971 5972 PetscCall(PetscFree(bufj)); 5973 if (!startsj_s || !bufa_ptr) { 5974 PetscCall(PetscFree2(sstartsj, rstartsj)); 5975 PetscCall(PetscFree(bufa_ptr)); 5976 } else { 5977 *startsj_s = sstartsj; 5978 *startsj_r = rstartsj; 5979 *bufa_ptr = bufa; 5980 } 5981 } else if (scall == MAT_REUSE_MATRIX) { 5982 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5983 } 5984 5985 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5986 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5987 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5988 PetscFunctionReturn(PETSC_SUCCESS); 5989 } 5990 5991 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5992 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5993 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5994 #if defined(PETSC_HAVE_MKL_SPARSE) 5995 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5996 #endif 5997 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5998 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5999 #if defined(PETSC_HAVE_ELEMENTAL) 6000 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6001 #endif 6002 #if defined(PETSC_HAVE_SCALAPACK) 6003 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6004 #endif 6005 #if defined(PETSC_HAVE_HYPRE) 6006 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6007 #endif 6008 #if defined(PETSC_HAVE_CUDA) 6009 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6010 #endif 6011 #if defined(PETSC_HAVE_HIP) 6012 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6013 #endif 6014 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6015 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6016 #endif 6017 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6018 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6019 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6020 6021 /* 6022 Computes (B'*A')' since computing B*A directly is untenable 6023 6024 n p p 6025 [ ] [ ] [ ] 6026 m [ A ] * n [ B ] = m [ C ] 6027 [ ] [ ] [ ] 6028 6029 */ 6030 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6031 { 6032 Mat At, Bt, Ct; 6033 6034 PetscFunctionBegin; 6035 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6036 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6037 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6038 PetscCall(MatDestroy(&At)); 6039 PetscCall(MatDestroy(&Bt)); 6040 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6041 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6042 PetscCall(MatDestroy(&Ct)); 6043 PetscFunctionReturn(PETSC_SUCCESS); 6044 } 6045 6046 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6047 { 6048 PetscBool cisdense; 6049 6050 PetscFunctionBegin; 6051 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6052 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6053 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6054 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6055 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6056 PetscCall(MatSetUp(C)); 6057 6058 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6059 PetscFunctionReturn(PETSC_SUCCESS); 6060 } 6061 6062 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6063 { 6064 Mat_Product *product = C->product; 6065 Mat A = product->A, B = product->B; 6066 6067 PetscFunctionBegin; 6068 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6069 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6070 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6071 C->ops->productsymbolic = MatProductSymbolic_AB; 6072 PetscFunctionReturn(PETSC_SUCCESS); 6073 } 6074 6075 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6076 { 6077 Mat_Product *product = C->product; 6078 6079 PetscFunctionBegin; 6080 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6081 PetscFunctionReturn(PETSC_SUCCESS); 6082 } 6083 6084 /* 6085 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6086 6087 Input Parameters: 6088 6089 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6090 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6091 6092 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6093 6094 For Set1, j1[] contains column indices of the nonzeros. 6095 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6096 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6097 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6098 6099 Similar for Set2. 6100 6101 This routine merges the two sets of nonzeros row by row and removes repeats. 6102 6103 Output Parameters: (memory is allocated by the caller) 6104 6105 i[],j[]: the CSR of the merged matrix, which has m rows. 6106 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6107 imap2[]: similar to imap1[], but for Set2. 6108 Note we order nonzeros row-by-row and from left to right. 6109 */ 6110 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6111 { 6112 PetscInt r, m; /* Row index of mat */ 6113 PetscCount t, t1, t2, b1, e1, b2, e2; 6114 6115 PetscFunctionBegin; 6116 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6117 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6118 i[0] = 0; 6119 for (r = 0; r < m; r++) { /* Do row by row merging */ 6120 b1 = rowBegin1[r]; 6121 e1 = rowEnd1[r]; 6122 b2 = rowBegin2[r]; 6123 e2 = rowEnd2[r]; 6124 while (b1 < e1 && b2 < e2) { 6125 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6126 j[t] = j1[b1]; 6127 imap1[t1] = t; 6128 imap2[t2] = t; 6129 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6130 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6131 t1++; 6132 t2++; 6133 t++; 6134 } else if (j1[b1] < j2[b2]) { 6135 j[t] = j1[b1]; 6136 imap1[t1] = t; 6137 b1 += jmap1[t1 + 1] - jmap1[t1]; 6138 t1++; 6139 t++; 6140 } else { 6141 j[t] = j2[b2]; 6142 imap2[t2] = t; 6143 b2 += jmap2[t2 + 1] - jmap2[t2]; 6144 t2++; 6145 t++; 6146 } 6147 } 6148 /* Merge the remaining in either j1[] or j2[] */ 6149 while (b1 < e1) { 6150 j[t] = j1[b1]; 6151 imap1[t1] = t; 6152 b1 += jmap1[t1 + 1] - jmap1[t1]; 6153 t1++; 6154 t++; 6155 } 6156 while (b2 < e2) { 6157 j[t] = j2[b2]; 6158 imap2[t2] = t; 6159 b2 += jmap2[t2 + 1] - jmap2[t2]; 6160 t2++; 6161 t++; 6162 } 6163 i[r + 1] = t; 6164 } 6165 PetscFunctionReturn(PETSC_SUCCESS); 6166 } 6167 6168 /* 6169 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6170 6171 Input Parameters: 6172 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6173 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6174 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6175 6176 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6177 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6178 6179 Output Parameters: 6180 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6181 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6182 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6183 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6184 6185 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6186 Atot: number of entries belonging to the diagonal block. 6187 Annz: number of unique nonzeros belonging to the diagonal block. 6188 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6189 repeats (i.e., same 'i,j' pair). 6190 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6191 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6192 6193 Atot: number of entries belonging to the diagonal block 6194 Annz: number of unique nonzeros belonging to the diagonal block. 6195 6196 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6197 6198 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6199 */ 6200 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6201 { 6202 PetscInt cstart, cend, rstart, rend, row, col; 6203 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6204 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6205 PetscCount k, m, p, q, r, s, mid; 6206 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6207 6208 PetscFunctionBegin; 6209 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6210 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6211 m = rend - rstart; 6212 6213 /* Skip negative rows */ 6214 for (k = 0; k < n; k++) 6215 if (i[k] >= 0) break; 6216 6217 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6218 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6219 */ 6220 while (k < n) { 6221 row = i[k]; 6222 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6223 for (s = k; s < n; s++) 6224 if (i[s] != row) break; 6225 6226 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6227 for (p = k; p < s; p++) { 6228 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6229 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6230 } 6231 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6232 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6233 rowBegin[row - rstart] = k; 6234 rowMid[row - rstart] = mid; 6235 rowEnd[row - rstart] = s; 6236 6237 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6238 Atot += mid - k; 6239 Btot += s - mid; 6240 6241 /* Count unique nonzeros of this diag row */ 6242 for (p = k; p < mid;) { 6243 col = j[p]; 6244 do { 6245 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6246 p++; 6247 } while (p < mid && j[p] == col); 6248 Annz++; 6249 } 6250 6251 /* Count unique nonzeros of this offdiag row */ 6252 for (p = mid; p < s;) { 6253 col = j[p]; 6254 do { 6255 p++; 6256 } while (p < s && j[p] == col); 6257 Bnnz++; 6258 } 6259 k = s; 6260 } 6261 6262 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6263 PetscCall(PetscMalloc1(Atot, &Aperm)); 6264 PetscCall(PetscMalloc1(Btot, &Bperm)); 6265 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6266 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6267 6268 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6269 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6270 for (r = 0; r < m; r++) { 6271 k = rowBegin[r]; 6272 mid = rowMid[r]; 6273 s = rowEnd[r]; 6274 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6275 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6276 Atot += mid - k; 6277 Btot += s - mid; 6278 6279 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6280 for (p = k; p < mid;) { 6281 col = j[p]; 6282 q = p; 6283 do { 6284 p++; 6285 } while (p < mid && j[p] == col); 6286 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6287 Annz++; 6288 } 6289 6290 for (p = mid; p < s;) { 6291 col = j[p]; 6292 q = p; 6293 do { 6294 p++; 6295 } while (p < s && j[p] == col); 6296 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6297 Bnnz++; 6298 } 6299 } 6300 /* Output */ 6301 *Aperm_ = Aperm; 6302 *Annz_ = Annz; 6303 *Atot_ = Atot; 6304 *Ajmap_ = Ajmap; 6305 *Bperm_ = Bperm; 6306 *Bnnz_ = Bnnz; 6307 *Btot_ = Btot; 6308 *Bjmap_ = Bjmap; 6309 PetscFunctionReturn(PETSC_SUCCESS); 6310 } 6311 6312 /* 6313 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6314 6315 Input Parameters: 6316 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6317 nnz: number of unique nonzeros in the merged matrix 6318 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6319 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6320 6321 Output Parameter: (memory is allocated by the caller) 6322 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6323 6324 Example: 6325 nnz1 = 4 6326 nnz = 6 6327 imap = [1,3,4,5] 6328 jmap = [0,3,5,6,7] 6329 then, 6330 jmap_new = [0,0,3,3,5,6,7] 6331 */ 6332 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6333 { 6334 PetscCount k, p; 6335 6336 PetscFunctionBegin; 6337 jmap_new[0] = 0; 6338 p = nnz; /* p loops over jmap_new[] backwards */ 6339 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6340 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6341 } 6342 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6343 PetscFunctionReturn(PETSC_SUCCESS); 6344 } 6345 6346 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6347 { 6348 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6349 6350 PetscFunctionBegin; 6351 PetscCall(PetscSFDestroy(&coo->sf)); 6352 PetscCall(PetscFree(coo->Aperm1)); 6353 PetscCall(PetscFree(coo->Bperm1)); 6354 PetscCall(PetscFree(coo->Ajmap1)); 6355 PetscCall(PetscFree(coo->Bjmap1)); 6356 PetscCall(PetscFree(coo->Aimap2)); 6357 PetscCall(PetscFree(coo->Bimap2)); 6358 PetscCall(PetscFree(coo->Aperm2)); 6359 PetscCall(PetscFree(coo->Bperm2)); 6360 PetscCall(PetscFree(coo->Ajmap2)); 6361 PetscCall(PetscFree(coo->Bjmap2)); 6362 PetscCall(PetscFree(coo->Cperm1)); 6363 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6364 PetscCall(PetscFree(coo)); 6365 PetscFunctionReturn(PETSC_SUCCESS); 6366 } 6367 6368 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6369 { 6370 MPI_Comm comm; 6371 PetscMPIInt rank, size; 6372 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6373 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6374 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6375 PetscContainer container; 6376 MatCOOStruct_MPIAIJ *coo; 6377 6378 PetscFunctionBegin; 6379 PetscCall(PetscFree(mpiaij->garray)); 6380 PetscCall(VecDestroy(&mpiaij->lvec)); 6381 #if defined(PETSC_USE_CTABLE) 6382 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6383 #else 6384 PetscCall(PetscFree(mpiaij->colmap)); 6385 #endif 6386 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6387 mat->assembled = PETSC_FALSE; 6388 mat->was_assembled = PETSC_FALSE; 6389 6390 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6391 PetscCallMPI(MPI_Comm_size(comm, &size)); 6392 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6393 PetscCall(PetscLayoutSetUp(mat->rmap)); 6394 PetscCall(PetscLayoutSetUp(mat->cmap)); 6395 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6396 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6397 PetscCall(MatGetLocalSize(mat, &m, &n)); 6398 PetscCall(MatGetSize(mat, &M, &N)); 6399 6400 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6401 /* entries come first, then local rows, then remote rows. */ 6402 PetscCount n1 = coo_n, *perm1; 6403 PetscInt *i1 = coo_i, *j1 = coo_j; 6404 6405 PetscCall(PetscMalloc1(n1, &perm1)); 6406 for (k = 0; k < n1; k++) perm1[k] = k; 6407 6408 /* Manipulate indices so that entries with negative row or col indices will have smallest 6409 row indices, local entries will have greater but negative row indices, and remote entries 6410 will have positive row indices. 6411 */ 6412 for (k = 0; k < n1; k++) { 6413 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6414 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6415 else { 6416 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6417 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6418 } 6419 } 6420 6421 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6422 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6423 6424 /* Advance k to the first entry we need to take care of */ 6425 for (k = 0; k < n1; k++) 6426 if (i1[k] > PETSC_MIN_INT) break; 6427 PetscInt i1start = k; 6428 6429 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6430 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6431 6432 /* Send remote rows to their owner */ 6433 /* Find which rows should be sent to which remote ranks*/ 6434 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6435 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6436 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6437 const PetscInt *ranges; 6438 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6439 6440 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6441 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6442 for (k = rem; k < n1;) { 6443 PetscMPIInt owner; 6444 PetscInt firstRow, lastRow; 6445 6446 /* Locate a row range */ 6447 firstRow = i1[k]; /* first row of this owner */ 6448 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6449 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6450 6451 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6452 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6453 6454 /* All entries in [k,p) belong to this remote owner */ 6455 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6456 PetscMPIInt *sendto2; 6457 PetscInt *nentries2; 6458 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6459 6460 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6461 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6462 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6463 PetscCall(PetscFree2(sendto, nentries2)); 6464 sendto = sendto2; 6465 nentries = nentries2; 6466 maxNsend = maxNsend2; 6467 } 6468 sendto[nsend] = owner; 6469 nentries[nsend] = p - k; 6470 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6471 nsend++; 6472 k = p; 6473 } 6474 6475 /* Build 1st SF to know offsets on remote to send data */ 6476 PetscSF sf1; 6477 PetscInt nroots = 1, nroots2 = 0; 6478 PetscInt nleaves = nsend, nleaves2 = 0; 6479 PetscInt *offsets; 6480 PetscSFNode *iremote; 6481 6482 PetscCall(PetscSFCreate(comm, &sf1)); 6483 PetscCall(PetscMalloc1(nsend, &iremote)); 6484 PetscCall(PetscMalloc1(nsend, &offsets)); 6485 for (k = 0; k < nsend; k++) { 6486 iremote[k].rank = sendto[k]; 6487 iremote[k].index = 0; 6488 nleaves2 += nentries[k]; 6489 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6490 } 6491 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6492 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6493 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6494 PetscCall(PetscSFDestroy(&sf1)); 6495 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6496 6497 /* Build 2nd SF to send remote COOs to their owner */ 6498 PetscSF sf2; 6499 nroots = nroots2; 6500 nleaves = nleaves2; 6501 PetscCall(PetscSFCreate(comm, &sf2)); 6502 PetscCall(PetscSFSetFromOptions(sf2)); 6503 PetscCall(PetscMalloc1(nleaves, &iremote)); 6504 p = 0; 6505 for (k = 0; k < nsend; k++) { 6506 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6507 for (q = 0; q < nentries[k]; q++, p++) { 6508 iremote[p].rank = sendto[k]; 6509 iremote[p].index = offsets[k] + q; 6510 } 6511 } 6512 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6513 6514 /* Send the remote COOs to their owner */ 6515 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6516 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6517 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6518 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6519 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6520 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6521 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6522 6523 PetscCall(PetscFree(offsets)); 6524 PetscCall(PetscFree2(sendto, nentries)); 6525 6526 /* Sort received COOs by row along with the permutation array */ 6527 for (k = 0; k < n2; k++) perm2[k] = k; 6528 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6529 6530 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6531 PetscCount *Cperm1; 6532 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6533 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves)); 6534 6535 /* Support for HYPRE matrices, kind of a hack. 6536 Swap min column with diagonal so that diagonal values will go first */ 6537 PetscBool hypre; 6538 const char *name; 6539 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6540 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6541 if (hypre) { 6542 PetscInt *minj; 6543 PetscBT hasdiag; 6544 6545 PetscCall(PetscBTCreate(m, &hasdiag)); 6546 PetscCall(PetscMalloc1(m, &minj)); 6547 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6548 for (k = i1start; k < rem; k++) { 6549 if (j1[k] < cstart || j1[k] >= cend) continue; 6550 const PetscInt rindex = i1[k] - rstart; 6551 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6552 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6553 } 6554 for (k = 0; k < n2; k++) { 6555 if (j2[k] < cstart || j2[k] >= cend) continue; 6556 const PetscInt rindex = i2[k] - rstart; 6557 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6558 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6559 } 6560 for (k = i1start; k < rem; k++) { 6561 const PetscInt rindex = i1[k] - rstart; 6562 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6563 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6564 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6565 } 6566 for (k = 0; k < n2; k++) { 6567 const PetscInt rindex = i2[k] - rstart; 6568 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6569 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6570 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6571 } 6572 PetscCall(PetscBTDestroy(&hasdiag)); 6573 PetscCall(PetscFree(minj)); 6574 } 6575 6576 /* Split local COOs and received COOs into diag/offdiag portions */ 6577 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6578 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6579 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6580 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6581 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6582 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6583 6584 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6585 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6586 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6587 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6588 6589 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6590 PetscInt *Ai, *Bi; 6591 PetscInt *Aj, *Bj; 6592 6593 PetscCall(PetscMalloc1(m + 1, &Ai)); 6594 PetscCall(PetscMalloc1(m + 1, &Bi)); 6595 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6596 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6597 6598 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6599 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6600 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6601 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6602 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6603 6604 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6605 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6606 6607 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6608 /* expect nonzeros in A/B most likely have local contributing entries */ 6609 PetscInt Annz = Ai[m]; 6610 PetscInt Bnnz = Bi[m]; 6611 PetscCount *Ajmap1_new, *Bjmap1_new; 6612 6613 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6614 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6615 6616 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6617 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6618 6619 PetscCall(PetscFree(Aimap1)); 6620 PetscCall(PetscFree(Ajmap1)); 6621 PetscCall(PetscFree(Bimap1)); 6622 PetscCall(PetscFree(Bjmap1)); 6623 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6624 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6625 PetscCall(PetscFree(perm1)); 6626 PetscCall(PetscFree3(i2, j2, perm2)); 6627 6628 Ajmap1 = Ajmap1_new; 6629 Bjmap1 = Bjmap1_new; 6630 6631 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6632 if (Annz < Annz1 + Annz2) { 6633 PetscInt *Aj_new; 6634 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6635 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6636 PetscCall(PetscFree(Aj)); 6637 Aj = Aj_new; 6638 } 6639 6640 if (Bnnz < Bnnz1 + Bnnz2) { 6641 PetscInt *Bj_new; 6642 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6643 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6644 PetscCall(PetscFree(Bj)); 6645 Bj = Bj_new; 6646 } 6647 6648 /* Create new submatrices for on-process and off-process coupling */ 6649 PetscScalar *Aa, *Ba; 6650 MatType rtype; 6651 Mat_SeqAIJ *a, *b; 6652 PetscObjectState state; 6653 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6654 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6655 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6656 if (cstart) { 6657 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6658 } 6659 PetscCall(MatDestroy(&mpiaij->A)); 6660 PetscCall(MatDestroy(&mpiaij->B)); 6661 PetscCall(MatGetRootType_Private(mat, &rtype)); 6662 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6663 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6664 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6665 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6666 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6667 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6668 6669 a = (Mat_SeqAIJ *)mpiaij->A->data; 6670 b = (Mat_SeqAIJ *)mpiaij->B->data; 6671 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6672 a->free_a = b->free_a = PETSC_TRUE; 6673 a->free_ij = b->free_ij = PETSC_TRUE; 6674 6675 /* conversion must happen AFTER multiply setup */ 6676 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6677 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6678 PetscCall(VecDestroy(&mpiaij->lvec)); 6679 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6680 6681 // Put the COO struct in a container and then attach that to the matrix 6682 PetscCall(PetscMalloc1(1, &coo)); 6683 coo->n = coo_n; 6684 coo->sf = sf2; 6685 coo->sendlen = nleaves; 6686 coo->recvlen = nroots; 6687 coo->Annz = Annz; 6688 coo->Bnnz = Bnnz; 6689 coo->Annz2 = Annz2; 6690 coo->Bnnz2 = Bnnz2; 6691 coo->Atot1 = Atot1; 6692 coo->Atot2 = Atot2; 6693 coo->Btot1 = Btot1; 6694 coo->Btot2 = Btot2; 6695 coo->Ajmap1 = Ajmap1; 6696 coo->Aperm1 = Aperm1; 6697 coo->Bjmap1 = Bjmap1; 6698 coo->Bperm1 = Bperm1; 6699 coo->Aimap2 = Aimap2; 6700 coo->Ajmap2 = Ajmap2; 6701 coo->Aperm2 = Aperm2; 6702 coo->Bimap2 = Bimap2; 6703 coo->Bjmap2 = Bjmap2; 6704 coo->Bperm2 = Bperm2; 6705 coo->Cperm1 = Cperm1; 6706 // Allocate in preallocation. If not used, it has zero cost on host 6707 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6708 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6709 PetscCall(PetscContainerSetPointer(container, coo)); 6710 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6711 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6712 PetscCall(PetscContainerDestroy(&container)); 6713 PetscFunctionReturn(PETSC_SUCCESS); 6714 } 6715 6716 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6717 { 6718 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6719 Mat A = mpiaij->A, B = mpiaij->B; 6720 PetscScalar *Aa, *Ba; 6721 PetscScalar *sendbuf, *recvbuf; 6722 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6723 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6724 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6725 const PetscCount *Cperm1; 6726 PetscContainer container; 6727 MatCOOStruct_MPIAIJ *coo; 6728 6729 PetscFunctionBegin; 6730 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6731 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6732 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6733 sendbuf = coo->sendbuf; 6734 recvbuf = coo->recvbuf; 6735 Ajmap1 = coo->Ajmap1; 6736 Ajmap2 = coo->Ajmap2; 6737 Aimap2 = coo->Aimap2; 6738 Bjmap1 = coo->Bjmap1; 6739 Bjmap2 = coo->Bjmap2; 6740 Bimap2 = coo->Bimap2; 6741 Aperm1 = coo->Aperm1; 6742 Aperm2 = coo->Aperm2; 6743 Bperm1 = coo->Bperm1; 6744 Bperm2 = coo->Bperm2; 6745 Cperm1 = coo->Cperm1; 6746 6747 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6748 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6749 6750 /* Pack entries to be sent to remote */ 6751 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6752 6753 /* Send remote entries to their owner and overlap the communication with local computation */ 6754 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6755 /* Add local entries to A and B */ 6756 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6757 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6758 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6759 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6760 } 6761 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6762 PetscScalar sum = 0.0; 6763 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6764 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6765 } 6766 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6767 6768 /* Add received remote entries to A and B */ 6769 for (PetscCount i = 0; i < coo->Annz2; i++) { 6770 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6771 } 6772 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6773 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6774 } 6775 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6776 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6777 PetscFunctionReturn(PETSC_SUCCESS); 6778 } 6779 6780 /*MC 6781 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6782 6783 Options Database Keys: 6784 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6785 6786 Level: beginner 6787 6788 Notes: 6789 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6790 in this case the values associated with the rows and columns one passes in are set to zero 6791 in the matrix 6792 6793 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6794 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6795 6796 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6797 M*/ 6798 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6799 { 6800 Mat_MPIAIJ *b; 6801 PetscMPIInt size; 6802 6803 PetscFunctionBegin; 6804 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6805 6806 PetscCall(PetscNew(&b)); 6807 B->data = (void *)b; 6808 B->ops[0] = MatOps_Values; 6809 B->assembled = PETSC_FALSE; 6810 B->insertmode = NOT_SET_VALUES; 6811 b->size = size; 6812 6813 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6814 6815 /* build cache for off array entries formed */ 6816 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6817 6818 b->donotstash = PETSC_FALSE; 6819 b->colmap = NULL; 6820 b->garray = NULL; 6821 b->roworiented = PETSC_TRUE; 6822 6823 /* stuff used for matrix vector multiply */ 6824 b->lvec = NULL; 6825 b->Mvctx = NULL; 6826 6827 /* stuff for MatGetRow() */ 6828 b->rowindices = NULL; 6829 b->rowvalues = NULL; 6830 b->getrowactive = PETSC_FALSE; 6831 6832 /* flexible pointer used in CUSPARSE classes */ 6833 b->spptr = NULL; 6834 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6837 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6845 #if defined(PETSC_HAVE_CUDA) 6846 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6847 #endif 6848 #if defined(PETSC_HAVE_HIP) 6849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6850 #endif 6851 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6852 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6853 #endif 6854 #if defined(PETSC_HAVE_MKL_SPARSE) 6855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6856 #endif 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6859 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6860 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6861 #if defined(PETSC_HAVE_ELEMENTAL) 6862 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6863 #endif 6864 #if defined(PETSC_HAVE_SCALAPACK) 6865 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6866 #endif 6867 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6868 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6869 #if defined(PETSC_HAVE_HYPRE) 6870 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6871 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6872 #endif 6873 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6874 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6875 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6876 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6877 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6878 PetscFunctionReturn(PETSC_SUCCESS); 6879 } 6880 6881 /*@C 6882 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6883 and "off-diagonal" part of the matrix in CSR format. 6884 6885 Collective 6886 6887 Input Parameters: 6888 + comm - MPI communicator 6889 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6890 . n - This value should be the same as the local size used in creating the 6891 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6892 calculated if `N` is given) For square matrices `n` is almost always `m`. 6893 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6894 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6895 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6896 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6897 . a - matrix values 6898 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6899 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6900 - oa - matrix values 6901 6902 Output Parameter: 6903 . mat - the matrix 6904 6905 Level: advanced 6906 6907 Notes: 6908 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6909 must free the arrays once the matrix has been destroyed and not before. 6910 6911 The `i` and `j` indices are 0 based 6912 6913 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6914 6915 This sets local rows and cannot be used to set off-processor values. 6916 6917 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6918 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6919 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6920 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6921 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6922 communication if it is known that only local entries will be set. 6923 6924 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6925 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6926 @*/ 6927 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6928 { 6929 Mat_MPIAIJ *maij; 6930 6931 PetscFunctionBegin; 6932 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6933 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6934 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6935 PetscCall(MatCreate(comm, mat)); 6936 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6937 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6938 maij = (Mat_MPIAIJ *)(*mat)->data; 6939 6940 (*mat)->preallocated = PETSC_TRUE; 6941 6942 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6943 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6944 6945 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6946 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6947 6948 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6949 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6950 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6951 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6952 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6953 PetscFunctionReturn(PETSC_SUCCESS); 6954 } 6955 6956 typedef struct { 6957 Mat *mp; /* intermediate products */ 6958 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6959 PetscInt cp; /* number of intermediate products */ 6960 6961 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6962 PetscInt *startsj_s, *startsj_r; 6963 PetscScalar *bufa; 6964 Mat P_oth; 6965 6966 /* may take advantage of merging product->B */ 6967 Mat Bloc; /* B-local by merging diag and off-diag */ 6968 6969 /* cusparse does not have support to split between symbolic and numeric phases. 6970 When api_user is true, we don't need to update the numerical values 6971 of the temporary storage */ 6972 PetscBool reusesym; 6973 6974 /* support for COO values insertion */ 6975 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6976 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6977 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6978 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6979 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6980 PetscMemType mtype; 6981 6982 /* customization */ 6983 PetscBool abmerge; 6984 PetscBool P_oth_bind; 6985 } MatMatMPIAIJBACKEND; 6986 6987 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6988 { 6989 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6990 PetscInt i; 6991 6992 PetscFunctionBegin; 6993 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6994 PetscCall(PetscFree(mmdata->bufa)); 6995 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6996 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6997 PetscCall(MatDestroy(&mmdata->P_oth)); 6998 PetscCall(MatDestroy(&mmdata->Bloc)); 6999 PetscCall(PetscSFDestroy(&mmdata->sf)); 7000 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7001 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7002 PetscCall(PetscFree(mmdata->own[0])); 7003 PetscCall(PetscFree(mmdata->own)); 7004 PetscCall(PetscFree(mmdata->off[0])); 7005 PetscCall(PetscFree(mmdata->off)); 7006 PetscCall(PetscFree(mmdata)); 7007 PetscFunctionReturn(PETSC_SUCCESS); 7008 } 7009 7010 /* Copy selected n entries with indices in idx[] of A to v[]. 7011 If idx is NULL, copy the whole data array of A to v[] 7012 */ 7013 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7014 { 7015 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7016 7017 PetscFunctionBegin; 7018 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7019 if (f) { 7020 PetscCall((*f)(A, n, idx, v)); 7021 } else { 7022 const PetscScalar *vv; 7023 7024 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7025 if (n && idx) { 7026 PetscScalar *w = v; 7027 const PetscInt *oi = idx; 7028 PetscInt j; 7029 7030 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7031 } else { 7032 PetscCall(PetscArraycpy(v, vv, n)); 7033 } 7034 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7035 } 7036 PetscFunctionReturn(PETSC_SUCCESS); 7037 } 7038 7039 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7040 { 7041 MatMatMPIAIJBACKEND *mmdata; 7042 PetscInt i, n_d, n_o; 7043 7044 PetscFunctionBegin; 7045 MatCheckProduct(C, 1); 7046 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7047 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7048 if (!mmdata->reusesym) { /* update temporary matrices */ 7049 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7050 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7051 } 7052 mmdata->reusesym = PETSC_FALSE; 7053 7054 for (i = 0; i < mmdata->cp; i++) { 7055 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7056 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7057 } 7058 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7059 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7060 7061 if (mmdata->mptmp[i]) continue; 7062 if (noff) { 7063 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7064 7065 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7066 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7067 n_o += noff; 7068 n_d += nown; 7069 } else { 7070 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7071 7072 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7073 n_d += mm->nz; 7074 } 7075 } 7076 if (mmdata->hasoffproc) { /* offprocess insertion */ 7077 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7078 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7079 } 7080 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7081 PetscFunctionReturn(PETSC_SUCCESS); 7082 } 7083 7084 /* Support for Pt * A, A * P, or Pt * A * P */ 7085 #define MAX_NUMBER_INTERMEDIATE 4 7086 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7087 { 7088 Mat_Product *product = C->product; 7089 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7090 Mat_MPIAIJ *a, *p; 7091 MatMatMPIAIJBACKEND *mmdata; 7092 ISLocalToGlobalMapping P_oth_l2g = NULL; 7093 IS glob = NULL; 7094 const char *prefix; 7095 char pprefix[256]; 7096 const PetscInt *globidx, *P_oth_idx; 7097 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7098 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7099 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7100 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7101 /* a base offset; type-2: sparse with a local to global map table */ 7102 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7103 7104 MatProductType ptype; 7105 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7106 PetscMPIInt size; 7107 7108 PetscFunctionBegin; 7109 MatCheckProduct(C, 1); 7110 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7111 ptype = product->type; 7112 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7113 ptype = MATPRODUCT_AB; 7114 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7115 } 7116 switch (ptype) { 7117 case MATPRODUCT_AB: 7118 A = product->A; 7119 P = product->B; 7120 m = A->rmap->n; 7121 n = P->cmap->n; 7122 M = A->rmap->N; 7123 N = P->cmap->N; 7124 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7125 break; 7126 case MATPRODUCT_AtB: 7127 P = product->A; 7128 A = product->B; 7129 m = P->cmap->n; 7130 n = A->cmap->n; 7131 M = P->cmap->N; 7132 N = A->cmap->N; 7133 hasoffproc = PETSC_TRUE; 7134 break; 7135 case MATPRODUCT_PtAP: 7136 A = product->A; 7137 P = product->B; 7138 m = P->cmap->n; 7139 n = P->cmap->n; 7140 M = P->cmap->N; 7141 N = P->cmap->N; 7142 hasoffproc = PETSC_TRUE; 7143 break; 7144 default: 7145 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7146 } 7147 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7148 if (size == 1) hasoffproc = PETSC_FALSE; 7149 7150 /* defaults */ 7151 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7152 mp[i] = NULL; 7153 mptmp[i] = PETSC_FALSE; 7154 rmapt[i] = -1; 7155 cmapt[i] = -1; 7156 rmapa[i] = NULL; 7157 cmapa[i] = NULL; 7158 } 7159 7160 /* customization */ 7161 PetscCall(PetscNew(&mmdata)); 7162 mmdata->reusesym = product->api_user; 7163 if (ptype == MATPRODUCT_AB) { 7164 if (product->api_user) { 7165 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7166 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7167 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7168 PetscOptionsEnd(); 7169 } else { 7170 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7171 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7172 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7173 PetscOptionsEnd(); 7174 } 7175 } else if (ptype == MATPRODUCT_PtAP) { 7176 if (product->api_user) { 7177 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7178 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7179 PetscOptionsEnd(); 7180 } else { 7181 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7182 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7183 PetscOptionsEnd(); 7184 } 7185 } 7186 a = (Mat_MPIAIJ *)A->data; 7187 p = (Mat_MPIAIJ *)P->data; 7188 PetscCall(MatSetSizes(C, m, n, M, N)); 7189 PetscCall(PetscLayoutSetUp(C->rmap)); 7190 PetscCall(PetscLayoutSetUp(C->cmap)); 7191 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7192 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7193 7194 cp = 0; 7195 switch (ptype) { 7196 case MATPRODUCT_AB: /* A * P */ 7197 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7198 7199 /* A_diag * P_local (merged or not) */ 7200 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7201 /* P is product->B */ 7202 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7203 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7204 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7205 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7206 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7207 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7208 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7209 mp[cp]->product->api_user = product->api_user; 7210 PetscCall(MatProductSetFromOptions(mp[cp])); 7211 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7212 PetscCall(ISGetIndices(glob, &globidx)); 7213 rmapt[cp] = 1; 7214 cmapt[cp] = 2; 7215 cmapa[cp] = globidx; 7216 mptmp[cp] = PETSC_FALSE; 7217 cp++; 7218 } else { /* A_diag * P_diag and A_diag * P_off */ 7219 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7220 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7221 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7222 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7223 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7224 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7225 mp[cp]->product->api_user = product->api_user; 7226 PetscCall(MatProductSetFromOptions(mp[cp])); 7227 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7228 rmapt[cp] = 1; 7229 cmapt[cp] = 1; 7230 mptmp[cp] = PETSC_FALSE; 7231 cp++; 7232 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7233 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7234 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7235 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7236 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7237 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7238 mp[cp]->product->api_user = product->api_user; 7239 PetscCall(MatProductSetFromOptions(mp[cp])); 7240 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7241 rmapt[cp] = 1; 7242 cmapt[cp] = 2; 7243 cmapa[cp] = p->garray; 7244 mptmp[cp] = PETSC_FALSE; 7245 cp++; 7246 } 7247 7248 /* A_off * P_other */ 7249 if (mmdata->P_oth) { 7250 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7251 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7252 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7253 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7254 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7255 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7256 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7257 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7258 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7259 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7260 mp[cp]->product->api_user = product->api_user; 7261 PetscCall(MatProductSetFromOptions(mp[cp])); 7262 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7263 rmapt[cp] = 1; 7264 cmapt[cp] = 2; 7265 cmapa[cp] = P_oth_idx; 7266 mptmp[cp] = PETSC_FALSE; 7267 cp++; 7268 } 7269 break; 7270 7271 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7272 /* A is product->B */ 7273 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7274 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7275 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7276 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7277 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7278 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7279 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7280 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7281 mp[cp]->product->api_user = product->api_user; 7282 PetscCall(MatProductSetFromOptions(mp[cp])); 7283 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7284 PetscCall(ISGetIndices(glob, &globidx)); 7285 rmapt[cp] = 2; 7286 rmapa[cp] = globidx; 7287 cmapt[cp] = 2; 7288 cmapa[cp] = globidx; 7289 mptmp[cp] = PETSC_FALSE; 7290 cp++; 7291 } else { 7292 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7293 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7294 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7295 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7296 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7297 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7298 mp[cp]->product->api_user = product->api_user; 7299 PetscCall(MatProductSetFromOptions(mp[cp])); 7300 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7301 PetscCall(ISGetIndices(glob, &globidx)); 7302 rmapt[cp] = 1; 7303 cmapt[cp] = 2; 7304 cmapa[cp] = globidx; 7305 mptmp[cp] = PETSC_FALSE; 7306 cp++; 7307 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7308 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7309 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7310 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7311 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7312 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7313 mp[cp]->product->api_user = product->api_user; 7314 PetscCall(MatProductSetFromOptions(mp[cp])); 7315 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7316 rmapt[cp] = 2; 7317 rmapa[cp] = p->garray; 7318 cmapt[cp] = 2; 7319 cmapa[cp] = globidx; 7320 mptmp[cp] = PETSC_FALSE; 7321 cp++; 7322 } 7323 break; 7324 case MATPRODUCT_PtAP: 7325 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7326 /* P is product->B */ 7327 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7328 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7329 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7330 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7331 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7332 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7333 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7334 mp[cp]->product->api_user = product->api_user; 7335 PetscCall(MatProductSetFromOptions(mp[cp])); 7336 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7337 PetscCall(ISGetIndices(glob, &globidx)); 7338 rmapt[cp] = 2; 7339 rmapa[cp] = globidx; 7340 cmapt[cp] = 2; 7341 cmapa[cp] = globidx; 7342 mptmp[cp] = PETSC_FALSE; 7343 cp++; 7344 if (mmdata->P_oth) { 7345 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7346 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7347 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7348 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7349 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7350 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7351 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7352 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7353 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7354 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7355 mp[cp]->product->api_user = product->api_user; 7356 PetscCall(MatProductSetFromOptions(mp[cp])); 7357 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7358 mptmp[cp] = PETSC_TRUE; 7359 cp++; 7360 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7361 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7362 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7363 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7364 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7365 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7366 mp[cp]->product->api_user = product->api_user; 7367 PetscCall(MatProductSetFromOptions(mp[cp])); 7368 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7369 rmapt[cp] = 2; 7370 rmapa[cp] = globidx; 7371 cmapt[cp] = 2; 7372 cmapa[cp] = P_oth_idx; 7373 mptmp[cp] = PETSC_FALSE; 7374 cp++; 7375 } 7376 break; 7377 default: 7378 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7379 } 7380 /* sanity check */ 7381 if (size > 1) 7382 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7383 7384 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7385 for (i = 0; i < cp; i++) { 7386 mmdata->mp[i] = mp[i]; 7387 mmdata->mptmp[i] = mptmp[i]; 7388 } 7389 mmdata->cp = cp; 7390 C->product->data = mmdata; 7391 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7392 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7393 7394 /* memory type */ 7395 mmdata->mtype = PETSC_MEMTYPE_HOST; 7396 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7397 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7398 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7399 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7400 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7401 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7402 7403 /* prepare coo coordinates for values insertion */ 7404 7405 /* count total nonzeros of those intermediate seqaij Mats 7406 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7407 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7408 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7409 */ 7410 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7411 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7412 if (mptmp[cp]) continue; 7413 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7414 const PetscInt *rmap = rmapa[cp]; 7415 const PetscInt mr = mp[cp]->rmap->n; 7416 const PetscInt rs = C->rmap->rstart; 7417 const PetscInt re = C->rmap->rend; 7418 const PetscInt *ii = mm->i; 7419 for (i = 0; i < mr; i++) { 7420 const PetscInt gr = rmap[i]; 7421 const PetscInt nz = ii[i + 1] - ii[i]; 7422 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7423 else ncoo_oown += nz; /* this row is local */ 7424 } 7425 } else ncoo_d += mm->nz; 7426 } 7427 7428 /* 7429 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7430 7431 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7432 7433 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7434 7435 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7436 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7437 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7438 7439 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7440 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7441 */ 7442 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7443 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7444 7445 /* gather (i,j) of nonzeros inserted by remote procs */ 7446 if (hasoffproc) { 7447 PetscSF msf; 7448 PetscInt ncoo2, *coo_i2, *coo_j2; 7449 7450 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7451 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7452 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7453 7454 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7455 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7456 PetscInt *idxoff = mmdata->off[cp]; 7457 PetscInt *idxown = mmdata->own[cp]; 7458 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7459 const PetscInt *rmap = rmapa[cp]; 7460 const PetscInt *cmap = cmapa[cp]; 7461 const PetscInt *ii = mm->i; 7462 PetscInt *coi = coo_i + ncoo_o; 7463 PetscInt *coj = coo_j + ncoo_o; 7464 const PetscInt mr = mp[cp]->rmap->n; 7465 const PetscInt rs = C->rmap->rstart; 7466 const PetscInt re = C->rmap->rend; 7467 const PetscInt cs = C->cmap->rstart; 7468 for (i = 0; i < mr; i++) { 7469 const PetscInt *jj = mm->j + ii[i]; 7470 const PetscInt gr = rmap[i]; 7471 const PetscInt nz = ii[i + 1] - ii[i]; 7472 if (gr < rs || gr >= re) { /* this is an offproc row */ 7473 for (j = ii[i]; j < ii[i + 1]; j++) { 7474 *coi++ = gr; 7475 *idxoff++ = j; 7476 } 7477 if (!cmapt[cp]) { /* already global */ 7478 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7479 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7480 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7481 } else { /* offdiag */ 7482 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7483 } 7484 ncoo_o += nz; 7485 } else { /* this is a local row */ 7486 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7487 } 7488 } 7489 } 7490 mmdata->off[cp + 1] = idxoff; 7491 mmdata->own[cp + 1] = idxown; 7492 } 7493 7494 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7495 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7496 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7497 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7498 ncoo = ncoo_d + ncoo_oown + ncoo2; 7499 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7500 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7501 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7502 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7503 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7504 PetscCall(PetscFree2(coo_i, coo_j)); 7505 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7506 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7507 coo_i = coo_i2; 7508 coo_j = coo_j2; 7509 } else { /* no offproc values insertion */ 7510 ncoo = ncoo_d; 7511 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7512 7513 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7514 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7515 PetscCall(PetscSFSetUp(mmdata->sf)); 7516 } 7517 mmdata->hasoffproc = hasoffproc; 7518 7519 /* gather (i,j) of nonzeros inserted locally */ 7520 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7521 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7522 PetscInt *coi = coo_i + ncoo_d; 7523 PetscInt *coj = coo_j + ncoo_d; 7524 const PetscInt *jj = mm->j; 7525 const PetscInt *ii = mm->i; 7526 const PetscInt *cmap = cmapa[cp]; 7527 const PetscInt *rmap = rmapa[cp]; 7528 const PetscInt mr = mp[cp]->rmap->n; 7529 const PetscInt rs = C->rmap->rstart; 7530 const PetscInt re = C->rmap->rend; 7531 const PetscInt cs = C->cmap->rstart; 7532 7533 if (mptmp[cp]) continue; 7534 if (rmapt[cp] == 1) { /* consecutive rows */ 7535 /* fill coo_i */ 7536 for (i = 0; i < mr; i++) { 7537 const PetscInt gr = i + rs; 7538 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7539 } 7540 /* fill coo_j */ 7541 if (!cmapt[cp]) { /* type-0, already global */ 7542 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7543 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7544 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7545 } else { /* type-2, local to global for sparse columns */ 7546 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7547 } 7548 ncoo_d += mm->nz; 7549 } else if (rmapt[cp] == 2) { /* sparse rows */ 7550 for (i = 0; i < mr; i++) { 7551 const PetscInt *jj = mm->j + ii[i]; 7552 const PetscInt gr = rmap[i]; 7553 const PetscInt nz = ii[i + 1] - ii[i]; 7554 if (gr >= rs && gr < re) { /* local rows */ 7555 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7556 if (!cmapt[cp]) { /* type-0, already global */ 7557 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7558 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7559 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7560 } else { /* type-2, local to global for sparse columns */ 7561 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7562 } 7563 ncoo_d += nz; 7564 } 7565 } 7566 } 7567 } 7568 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7569 PetscCall(ISDestroy(&glob)); 7570 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7571 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7572 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7573 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7574 7575 /* preallocate with COO data */ 7576 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7577 PetscCall(PetscFree2(coo_i, coo_j)); 7578 PetscFunctionReturn(PETSC_SUCCESS); 7579 } 7580 7581 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7582 { 7583 Mat_Product *product = mat->product; 7584 #if defined(PETSC_HAVE_DEVICE) 7585 PetscBool match = PETSC_FALSE; 7586 PetscBool usecpu = PETSC_FALSE; 7587 #else 7588 PetscBool match = PETSC_TRUE; 7589 #endif 7590 7591 PetscFunctionBegin; 7592 MatCheckProduct(mat, 1); 7593 #if defined(PETSC_HAVE_DEVICE) 7594 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7595 if (match) { /* we can always fallback to the CPU if requested */ 7596 switch (product->type) { 7597 case MATPRODUCT_AB: 7598 if (product->api_user) { 7599 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7600 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7601 PetscOptionsEnd(); 7602 } else { 7603 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7604 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7605 PetscOptionsEnd(); 7606 } 7607 break; 7608 case MATPRODUCT_AtB: 7609 if (product->api_user) { 7610 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7611 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7612 PetscOptionsEnd(); 7613 } else { 7614 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7615 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7616 PetscOptionsEnd(); 7617 } 7618 break; 7619 case MATPRODUCT_PtAP: 7620 if (product->api_user) { 7621 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7622 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7623 PetscOptionsEnd(); 7624 } else { 7625 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7626 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7627 PetscOptionsEnd(); 7628 } 7629 break; 7630 default: 7631 break; 7632 } 7633 match = (PetscBool)!usecpu; 7634 } 7635 #endif 7636 if (match) { 7637 switch (product->type) { 7638 case MATPRODUCT_AB: 7639 case MATPRODUCT_AtB: 7640 case MATPRODUCT_PtAP: 7641 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7642 break; 7643 default: 7644 break; 7645 } 7646 } 7647 /* fallback to MPIAIJ ops */ 7648 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7649 PetscFunctionReturn(PETSC_SUCCESS); 7650 } 7651 7652 /* 7653 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7654 7655 n - the number of block indices in cc[] 7656 cc - the block indices (must be large enough to contain the indices) 7657 */ 7658 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7659 { 7660 PetscInt cnt = -1, nidx, j; 7661 const PetscInt *idx; 7662 7663 PetscFunctionBegin; 7664 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7665 if (nidx) { 7666 cnt = 0; 7667 cc[cnt] = idx[0] / bs; 7668 for (j = 1; j < nidx; j++) { 7669 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7670 } 7671 } 7672 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7673 *n = cnt + 1; 7674 PetscFunctionReturn(PETSC_SUCCESS); 7675 } 7676 7677 /* 7678 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7679 7680 ncollapsed - the number of block indices 7681 collapsed - the block indices (must be large enough to contain the indices) 7682 */ 7683 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7684 { 7685 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7686 7687 PetscFunctionBegin; 7688 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7689 for (i = start + 1; i < start + bs; i++) { 7690 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7691 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7692 cprevtmp = cprev; 7693 cprev = merged; 7694 merged = cprevtmp; 7695 } 7696 *ncollapsed = nprev; 7697 if (collapsed) *collapsed = cprev; 7698 PetscFunctionReturn(PETSC_SUCCESS); 7699 } 7700 7701 /* 7702 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7703 7704 Input Parameter: 7705 . Amat - matrix 7706 - symmetrize - make the result symmetric 7707 + scale - scale with diagonal 7708 7709 Output Parameter: 7710 . a_Gmat - output scalar graph >= 0 7711 7712 */ 7713 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7714 { 7715 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7716 MPI_Comm comm; 7717 Mat Gmat; 7718 PetscBool ismpiaij, isseqaij; 7719 Mat a, b, c; 7720 MatType jtype; 7721 7722 PetscFunctionBegin; 7723 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7724 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7725 PetscCall(MatGetSize(Amat, &MM, &NN)); 7726 PetscCall(MatGetBlockSize(Amat, &bs)); 7727 nloc = (Iend - Istart) / bs; 7728 7729 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7730 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7731 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7732 7733 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7734 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7735 implementation */ 7736 if (bs > 1) { 7737 PetscCall(MatGetType(Amat, &jtype)); 7738 PetscCall(MatCreate(comm, &Gmat)); 7739 PetscCall(MatSetType(Gmat, jtype)); 7740 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7741 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7742 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7743 PetscInt *d_nnz, *o_nnz; 7744 MatScalar *aa, val, *AA; 7745 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7746 if (isseqaij) { 7747 a = Amat; 7748 b = NULL; 7749 } else { 7750 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7751 a = d->A; 7752 b = d->B; 7753 } 7754 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7755 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7756 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7757 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7758 const PetscInt *cols1, *cols2; 7759 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7760 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7761 nnz[brow / bs] = nc2 / bs; 7762 if (nc2 % bs) ok = 0; 7763 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7764 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7765 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7766 if (nc1 != nc2) ok = 0; 7767 else { 7768 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7769 if (cols1[jj] != cols2[jj]) ok = 0; 7770 if (cols1[jj] % bs != jj % bs) ok = 0; 7771 } 7772 } 7773 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7774 } 7775 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7776 if (!ok) { 7777 PetscCall(PetscFree2(d_nnz, o_nnz)); 7778 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7779 goto old_bs; 7780 } 7781 } 7782 } 7783 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7784 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7785 PetscCall(PetscFree2(d_nnz, o_nnz)); 7786 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7787 // diag 7788 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7789 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7790 ai = aseq->i; 7791 n = ai[brow + 1] - ai[brow]; 7792 aj = aseq->j + ai[brow]; 7793 for (int k = 0; k < n; k += bs) { // block columns 7794 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7795 val = 0; 7796 for (int ii = 0; ii < bs; ii++) { // rows in block 7797 aa = aseq->a + ai[brow + ii] + k; 7798 for (int jj = 0; jj < bs; jj++) { // columns in block 7799 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7800 } 7801 } 7802 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7803 AA[k / bs] = val; 7804 } 7805 grow = Istart / bs + brow / bs; 7806 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7807 } 7808 // off-diag 7809 if (ismpiaij) { 7810 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7811 const PetscScalar *vals; 7812 const PetscInt *cols, *garray = aij->garray; 7813 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7814 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7815 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7816 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7817 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7818 AA[k / bs] = 0; 7819 AJ[cidx] = garray[cols[k]] / bs; 7820 } 7821 nc = ncols / bs; 7822 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7823 for (int ii = 0; ii < bs; ii++) { // rows in block 7824 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7825 for (int k = 0; k < ncols; k += bs) { 7826 for (int jj = 0; jj < bs; jj++) { // cols in block 7827 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7828 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7829 } 7830 } 7831 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7832 } 7833 grow = Istart / bs + brow / bs; 7834 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7835 } 7836 } 7837 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7838 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7839 PetscCall(PetscFree2(AA, AJ)); 7840 } else { 7841 const PetscScalar *vals; 7842 const PetscInt *idx; 7843 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7844 old_bs: 7845 /* 7846 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7847 */ 7848 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7849 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7850 if (isseqaij) { 7851 PetscInt max_d_nnz; 7852 /* 7853 Determine exact preallocation count for (sequential) scalar matrix 7854 */ 7855 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7856 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7857 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7858 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7859 PetscCall(PetscFree3(w0, w1, w2)); 7860 } else if (ismpiaij) { 7861 Mat Daij, Oaij; 7862 const PetscInt *garray; 7863 PetscInt max_d_nnz; 7864 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7865 /* 7866 Determine exact preallocation count for diagonal block portion of scalar matrix 7867 */ 7868 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7869 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7870 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7871 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7872 PetscCall(PetscFree3(w0, w1, w2)); 7873 /* 7874 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7875 */ 7876 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7877 o_nnz[jj] = 0; 7878 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7879 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7880 o_nnz[jj] += ncols; 7881 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7882 } 7883 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7884 } 7885 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7886 /* get scalar copy (norms) of matrix */ 7887 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7888 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7889 PetscCall(PetscFree2(d_nnz, o_nnz)); 7890 for (Ii = Istart; Ii < Iend; Ii++) { 7891 PetscInt dest_row = Ii / bs; 7892 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7893 for (jj = 0; jj < ncols; jj++) { 7894 PetscInt dest_col = idx[jj] / bs; 7895 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7896 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7897 } 7898 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7899 } 7900 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7901 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7902 } 7903 } else { 7904 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7905 else { 7906 Gmat = Amat; 7907 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7908 } 7909 if (isseqaij) { 7910 a = Gmat; 7911 b = NULL; 7912 } else { 7913 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7914 a = d->A; 7915 b = d->B; 7916 } 7917 if (filter >= 0 || scale) { 7918 /* take absolute value of each entry */ 7919 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7920 MatInfo info; 7921 PetscScalar *avals; 7922 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7923 PetscCall(MatSeqAIJGetArray(c, &avals)); 7924 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7925 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7926 } 7927 } 7928 } 7929 if (symmetrize) { 7930 PetscBool isset, issym; 7931 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7932 if (!isset || !issym) { 7933 Mat matTrans; 7934 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7935 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7936 PetscCall(MatDestroy(&matTrans)); 7937 } 7938 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7939 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7940 if (scale) { 7941 /* scale c for all diagonal values = 1 or -1 */ 7942 Vec diag; 7943 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7944 PetscCall(MatGetDiagonal(Gmat, diag)); 7945 PetscCall(VecReciprocal(diag)); 7946 PetscCall(VecSqrtAbs(diag)); 7947 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7948 PetscCall(VecDestroy(&diag)); 7949 } 7950 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7951 7952 if (filter >= 0) { 7953 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 7954 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 7955 } 7956 *a_Gmat = Gmat; 7957 PetscFunctionReturn(PETSC_SUCCESS); 7958 } 7959 7960 /* 7961 Special version for direct calls from Fortran 7962 */ 7963 #include <petsc/private/fortranimpl.h> 7964 7965 /* Change these macros so can be used in void function */ 7966 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7967 #undef PetscCall 7968 #define PetscCall(...) \ 7969 do { \ 7970 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7971 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7972 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 7973 return; \ 7974 } \ 7975 } while (0) 7976 7977 #undef SETERRQ 7978 #define SETERRQ(comm, ierr, ...) \ 7979 do { \ 7980 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 7981 return; \ 7982 } while (0) 7983 7984 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7985 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7986 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7987 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7988 #else 7989 #endif 7990 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 7991 { 7992 Mat mat = *mmat; 7993 PetscInt m = *mm, n = *mn; 7994 InsertMode addv = *maddv; 7995 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 7996 PetscScalar value; 7997 7998 MatCheckPreallocated(mat, 1); 7999 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8000 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8001 { 8002 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8003 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8004 PetscBool roworiented = aij->roworiented; 8005 8006 /* Some Variables required in the macro */ 8007 Mat A = aij->A; 8008 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8009 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8010 MatScalar *aa; 8011 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8012 Mat B = aij->B; 8013 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8014 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8015 MatScalar *ba; 8016 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8017 * cannot use "#if defined" inside a macro. */ 8018 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8019 8020 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8021 PetscInt nonew = a->nonew; 8022 MatScalar *ap1, *ap2; 8023 8024 PetscFunctionBegin; 8025 PetscCall(MatSeqAIJGetArray(A, &aa)); 8026 PetscCall(MatSeqAIJGetArray(B, &ba)); 8027 for (i = 0; i < m; i++) { 8028 if (im[i] < 0) continue; 8029 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8030 if (im[i] >= rstart && im[i] < rend) { 8031 row = im[i] - rstart; 8032 lastcol1 = -1; 8033 rp1 = aj + ai[row]; 8034 ap1 = aa + ai[row]; 8035 rmax1 = aimax[row]; 8036 nrow1 = ailen[row]; 8037 low1 = 0; 8038 high1 = nrow1; 8039 lastcol2 = -1; 8040 rp2 = bj + bi[row]; 8041 ap2 = ba + bi[row]; 8042 rmax2 = bimax[row]; 8043 nrow2 = bilen[row]; 8044 low2 = 0; 8045 high2 = nrow2; 8046 8047 for (j = 0; j < n; j++) { 8048 if (roworiented) value = v[i * n + j]; 8049 else value = v[i + j * m]; 8050 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8051 if (in[j] >= cstart && in[j] < cend) { 8052 col = in[j] - cstart; 8053 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8054 } else if (in[j] < 0) continue; 8055 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8056 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8057 } else { 8058 if (mat->was_assembled) { 8059 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8060 #if defined(PETSC_USE_CTABLE) 8061 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8062 col--; 8063 #else 8064 col = aij->colmap[in[j]] - 1; 8065 #endif 8066 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8067 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8068 col = in[j]; 8069 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8070 B = aij->B; 8071 b = (Mat_SeqAIJ *)B->data; 8072 bimax = b->imax; 8073 bi = b->i; 8074 bilen = b->ilen; 8075 bj = b->j; 8076 rp2 = bj + bi[row]; 8077 ap2 = ba + bi[row]; 8078 rmax2 = bimax[row]; 8079 nrow2 = bilen[row]; 8080 low2 = 0; 8081 high2 = nrow2; 8082 bm = aij->B->rmap->n; 8083 ba = b->a; 8084 inserted = PETSC_FALSE; 8085 } 8086 } else col = in[j]; 8087 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8088 } 8089 } 8090 } else if (!aij->donotstash) { 8091 if (roworiented) { 8092 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8093 } else { 8094 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8095 } 8096 } 8097 } 8098 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8099 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8100 } 8101 PetscFunctionReturnVoid(); 8102 } 8103 8104 /* Undefining these here since they were redefined from their original definition above! No 8105 * other PETSc functions should be defined past this point, as it is impossible to recover the 8106 * original definitions */ 8107 #undef PetscCall 8108 #undef SETERRQ 8109