1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 PetscMPIInt in; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 PetscCall(PetscMPIIntCast(n, &in)); 313 if (type == NORM_INFINITY) { 314 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 315 } else { 316 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 317 } 318 PetscCall(PetscFree(work)); 319 if (type == NORM_2) { 320 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 321 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 322 for (i = 0; i < n; i++) reductions[i] /= m; 323 } 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 328 { 329 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 330 IS sis, gis; 331 const PetscInt *isis, *igis; 332 PetscInt n, *iis, nsis, ngis, rstart, i; 333 334 PetscFunctionBegin; 335 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 336 PetscCall(MatFindNonzeroRows(a->B, &gis)); 337 PetscCall(ISGetSize(gis, &ngis)); 338 PetscCall(ISGetSize(sis, &nsis)); 339 PetscCall(ISGetIndices(sis, &isis)); 340 PetscCall(ISGetIndices(gis, &igis)); 341 342 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 343 PetscCall(PetscArraycpy(iis, igis, ngis)); 344 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 345 n = ngis + nsis; 346 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 348 for (i = 0; i < n; i++) iis[i] += rstart; 349 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 350 351 PetscCall(ISRestoreIndices(sis, &isis)); 352 PetscCall(ISRestoreIndices(gis, &igis)); 353 PetscCall(ISDestroy(&sis)); 354 PetscCall(ISDestroy(&gis)); 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 /* 359 Local utility routine that creates a mapping from the global column 360 number to the local number in the off-diagonal part of the local 361 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 362 a slightly higher hash table cost; without it it is not scalable (each processor 363 has an order N integer array but is fast to access. 364 */ 365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 366 { 367 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 368 PetscInt n = aij->B->cmap->n, i; 369 370 PetscFunctionBegin; 371 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 372 #if defined(PETSC_USE_CTABLE) 373 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 374 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 375 #else 376 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 377 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 378 #endif 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 383 do { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col; \ 387 while (high1 - low1 > 5) { \ 388 t = (low1 + high1) / 2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i = low1; _i < high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) { \ 396 ap1[_i] += value; \ 397 /* Not sure LogFlops will slow dow the code or not */ \ 398 (void)PetscLogFlops(1.0); \ 399 } else ap1[_i] = value; \ 400 goto a_noinsert; \ 401 } \ 402 } \ 403 if (value == 0.0 && ignorezeroentries && row != col) { \ 404 low1 = 0; \ 405 high1 = nrow1; \ 406 goto a_noinsert; \ 407 } \ 408 if (nonew == 1) { \ 409 low1 = 0; \ 410 high1 = nrow1; \ 411 goto a_noinsert; \ 412 } \ 413 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 414 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 415 N = nrow1++ - 1; \ 416 a->nz++; \ 417 high1++; \ 418 /* shift up all the later entries in this row */ \ 419 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 420 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 421 rp1[_i] = col; \ 422 ap1[_i] = value; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 584 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right-hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1128 1129 PetscFunctionBegin; 1130 /* do nondiagonal part */ 1131 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1132 /* do local part */ 1133 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1134 /* add partial results together */ 1135 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1136 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /* 1141 This only works correctly for square matrices where the subblock A->A is the 1142 diagonal block 1143 */ 1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1150 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1151 PetscCall(MatGetDiagonal(a->A, v)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatScale(a->A, aa)); 1161 PetscCall(MatScale(a->B, aa)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1170 const PetscInt *garray = aij->garray; 1171 const PetscScalar *aa, *ba; 1172 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1173 PetscInt64 nz, hnz; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 PetscMPIInt rank; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1195 if (rank == 0) { 1196 if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX; 1197 else header[3] = (PetscInt)hnz; 1198 } 1199 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1200 1201 /* fill in and store row lengths */ 1202 PetscCall(PetscMalloc1(m, &rowlens)); 1203 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1205 PetscCall(PetscFree(rowlens)); 1206 1207 /* fill in and store column indices */ 1208 PetscCall(PetscMalloc1(nz, &colidxs)); 1209 for (cnt = 0, i = 0; i < m; i++) { 1210 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1211 if (garray[B->j[jb]] > cs) break; 1212 colidxs[cnt++] = garray[B->j[jb]]; 1213 } 1214 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1215 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1216 } 1217 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1218 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1219 PetscCall(PetscFree(colidxs)); 1220 1221 /* fill in and store nonzero values */ 1222 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1223 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1224 PetscCall(PetscMalloc1(nz, &matvals)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 matvals[cnt++] = ba[jb]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1231 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1232 } 1233 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1235 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1236 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1237 PetscCall(PetscFree(matvals)); 1238 1239 /* write block size option to the viewer's .info file */ 1240 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 #include <petscdraw.h> 1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1246 { 1247 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1248 PetscMPIInt rank = aij->rank, size = aij->size; 1249 PetscBool isdraw, iascii, isbinary; 1250 PetscViewer sviewer; 1251 PetscViewerFormat format; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1255 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1256 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1257 if (iascii) { 1258 PetscCall(PetscViewerGetFormat(viewer, &format)); 1259 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1260 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1261 PetscCall(PetscMalloc1(size, &nz)); 1262 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1263 for (i = 0; i < (PetscInt)size; i++) { 1264 nmax = PetscMax(nmax, nz[i]); 1265 nmin = PetscMin(nmin, nz[i]); 1266 navg += nz[i]; 1267 } 1268 PetscCall(PetscFree(nz)); 1269 navg = navg / size; 1270 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 PetscCall(PetscViewerGetFormat(viewer, &format)); 1274 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1275 MatInfo info; 1276 PetscInt *inodes = NULL; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1279 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1280 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1281 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1282 if (!inodes) { 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1284 (double)info.memory)); 1285 } else { 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1287 (double)info.memory)); 1288 } 1289 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1293 PetscCall(PetscViewerFlush(viewer)); 1294 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1295 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1296 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1299 PetscInt inodecount, inodelimit, *inodes; 1300 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1301 if (inodes) { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1303 } else { 1304 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1305 } 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 } else if (isbinary) { 1311 if (size == 1) { 1312 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1313 PetscCall(MatView(aij->A, viewer)); 1314 } else { 1315 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (iascii && size == 1) { 1319 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1320 PetscCall(MatView(aij->A, viewer)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } else if (isdraw) { 1323 PetscDraw draw; 1324 PetscBool isnull; 1325 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1326 PetscCall(PetscDrawIsNull(draw, &isnull)); 1327 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 { /* assemble the entire matrix onto first processor */ 1331 Mat A = NULL, Av; 1332 IS isrow, iscol; 1333 1334 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1335 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1336 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1337 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1338 /* The commented code uses MatCreateSubMatrices instead */ 1339 /* 1340 Mat *AA, A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1344 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1345 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1346 if (rank == 0) { 1347 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1348 A = AA[0]; 1349 Av = AA[0]; 1350 } 1351 PetscCall(MatDestroySubMatrices(1,&AA)); 1352 */ 1353 PetscCall(ISDestroy(&iscol)); 1354 PetscCall(ISDestroy(&isrow)); 1355 /* 1356 Everyone has to call to draw the matrix since the graphics waits are 1357 synchronized across all processors that share the PetscDraw object 1358 */ 1359 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 if (rank == 0) { 1361 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1362 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1363 } 1364 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1365 PetscCall(MatDestroy(&A)); 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1371 { 1372 PetscBool iascii, isdraw, issocket, isbinary; 1373 1374 PetscFunctionBegin; 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1377 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1378 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1379 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1384 { 1385 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1386 Vec bb1 = NULL; 1387 PetscBool hasop; 1388 1389 PetscFunctionBegin; 1390 if (flag == SOR_APPLY_UPPER) { 1391 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1396 1397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1398 if (flag & SOR_ZERO_INITIAL_GUESS) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 its--; 1401 } 1402 1403 while (its--) { 1404 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1405 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1406 1407 /* update rhs: bb1 = bb - B*x */ 1408 PetscCall(VecScale(mat->lvec, -1.0)); 1409 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1410 1411 /* local sweep */ 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1413 } 1414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_EISENSTAT) { 1447 Vec xx1; 1448 1449 PetscCall(VecDuplicate(bb, &xx1)); 1450 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1451 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 if (!mat->diag) { 1455 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1456 PetscCall(MatGetDiagonal(matin, mat->diag)); 1457 } 1458 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1459 if (hasop) { 1460 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1461 } else { 1462 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1463 } 1464 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1465 1466 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1467 1468 /* local sweep */ 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1470 PetscCall(VecAXPY(xx, 1.0, xx1)); 1471 PetscCall(VecDestroy(&xx1)); 1472 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1473 1474 PetscCall(VecDestroy(&bb1)); 1475 1476 matin->factorerrortype = mat->A->factorerrortype; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1481 { 1482 Mat aA, aB, Aperm; 1483 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1484 PetscScalar *aa, *ba; 1485 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1486 PetscSF rowsf, sf; 1487 IS parcolp = NULL; 1488 PetscBool done; 1489 1490 PetscFunctionBegin; 1491 PetscCall(MatGetLocalSize(A, &m, &n)); 1492 PetscCall(ISGetIndices(rowp, &rwant)); 1493 PetscCall(ISGetIndices(colp, &cwant)); 1494 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1495 1496 /* Invert row permutation to find out where my rows should go */ 1497 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1498 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1499 PetscCall(PetscSFSetFromOptions(rowsf)); 1500 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1501 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1502 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1503 1504 /* Invert column permutation to find out where my columns should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1506 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1507 PetscCall(PetscSFSetFromOptions(sf)); 1508 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1511 PetscCall(PetscSFDestroy(&sf)); 1512 1513 PetscCall(ISRestoreIndices(rowp, &rwant)); 1514 PetscCall(ISRestoreIndices(colp, &cwant)); 1515 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1516 1517 /* Find out where my gcols should go */ 1518 PetscCall(MatGetSize(aB, NULL, &ng)); 1519 PetscCall(PetscMalloc1(ng, &gcdest)); 1520 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1521 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1522 PetscCall(PetscSFSetFromOptions(sf)); 1523 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1524 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1525 PetscCall(PetscSFDestroy(&sf)); 1526 1527 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1528 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1529 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1530 for (i = 0; i < m; i++) { 1531 PetscInt row = rdest[i]; 1532 PetscMPIInt rowner; 1533 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 PetscInt col = cdest[aj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 for (j = bi[i]; j < bi[i + 1]; j++) { 1542 PetscInt col = gcdest[bj[j]]; 1543 PetscMPIInt cowner; 1544 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1545 if (rowner == cowner) dnnz[i]++; 1546 else onnz[i]++; 1547 } 1548 } 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1551 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&rowsf)); 1554 1555 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1556 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1557 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1560 PetscInt j0, rowlen; 1561 rowlen = ai[i + 1] - ai[i]; 1562 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1563 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1564 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1565 } 1566 rowlen = bi[i + 1] - bi[i]; 1567 for (j0 = j = 0; j < rowlen; j0 = j) { 1568 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1569 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1570 } 1571 } 1572 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1575 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1576 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1577 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1578 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1579 PetscCall(PetscFree3(work, rdest, cdest)); 1580 PetscCall(PetscFree(gcdest)); 1581 if (parcolp) PetscCall(ISDestroy(&colp)); 1582 *B = Aperm; 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 } 1585 1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1587 { 1588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1589 1590 PetscFunctionBegin; 1591 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1592 if (ghosts) *ghosts = aij->garray; 1593 PetscFunctionReturn(PETSC_SUCCESS); 1594 } 1595 1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1597 { 1598 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1599 Mat A = mat->A, B = mat->B; 1600 PetscLogDouble isend[5], irecv[5]; 1601 1602 PetscFunctionBegin; 1603 info->block_size = 1.0; 1604 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1605 1606 isend[0] = info->nz_used; 1607 isend[1] = info->nz_allocated; 1608 isend[2] = info->nz_unneeded; 1609 isend[3] = info->memory; 1610 isend[4] = info->mallocs; 1611 1612 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1613 1614 isend[0] += info->nz_used; 1615 isend[1] += info->nz_allocated; 1616 isend[2] += info->nz_unneeded; 1617 isend[3] += info->memory; 1618 isend[4] += info->mallocs; 1619 if (flag == MAT_LOCAL) { 1620 info->nz_used = isend[0]; 1621 info->nz_allocated = isend[1]; 1622 info->nz_unneeded = isend[2]; 1623 info->memory = isend[3]; 1624 info->mallocs = isend[4]; 1625 } else if (flag == MAT_GLOBAL_MAX) { 1626 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1627 1628 info->nz_used = irecv[0]; 1629 info->nz_allocated = irecv[1]; 1630 info->nz_unneeded = irecv[2]; 1631 info->memory = irecv[3]; 1632 info->mallocs = irecv[4]; 1633 } else if (flag == MAT_GLOBAL_SUM) { 1634 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } 1642 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1643 info->fill_ratio_needed = 0; 1644 info->factor_mallocs = 0; 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1649 { 1650 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1651 1652 PetscFunctionBegin; 1653 switch (op) { 1654 case MAT_NEW_NONZERO_LOCATIONS: 1655 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1656 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1657 case MAT_KEEP_NONZERO_PATTERN: 1658 case MAT_NEW_NONZERO_LOCATION_ERR: 1659 case MAT_USE_INODES: 1660 case MAT_IGNORE_ZERO_ENTRIES: 1661 case MAT_FORM_EXPLICIT_TRANSPOSE: 1662 MatCheckPreallocated(A, 1); 1663 PetscCall(MatSetOption(a->A, op, flg)); 1664 PetscCall(MatSetOption(a->B, op, flg)); 1665 break; 1666 case MAT_ROW_ORIENTED: 1667 MatCheckPreallocated(A, 1); 1668 a->roworiented = flg; 1669 1670 PetscCall(MatSetOption(a->A, op, flg)); 1671 PetscCall(MatSetOption(a->B, op, flg)); 1672 break; 1673 case MAT_FORCE_DIAGONAL_ENTRIES: 1674 case MAT_SORTED_FULL: 1675 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1676 break; 1677 case MAT_IGNORE_OFF_PROC_ENTRIES: 1678 a->donotstash = flg; 1679 break; 1680 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1681 case MAT_SPD: 1682 case MAT_SYMMETRIC: 1683 case MAT_STRUCTURALLY_SYMMETRIC: 1684 case MAT_HERMITIAN: 1685 case MAT_SYMMETRY_ETERNAL: 1686 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1687 case MAT_SPD_ETERNAL: 1688 /* if the diagonal matrix is square it inherits some of the properties above */ 1689 break; 1690 case MAT_SUBMAT_SINGLEIS: 1691 A->submat_singleis = flg; 1692 break; 1693 case MAT_STRUCTURE_ONLY: 1694 /* The option is handled directly by MatSetOption() */ 1695 break; 1696 default: 1697 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1698 } 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1703 { 1704 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1705 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1706 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1707 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1708 PetscInt *cmap, *idx_p; 1709 1710 PetscFunctionBegin; 1711 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1712 mat->getrowactive = PETSC_TRUE; 1713 1714 if (!mat->rowvalues && (idx || v)) { 1715 /* 1716 allocate enough space to hold information from the longest row. 1717 */ 1718 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1719 PetscInt max = 1, tmp; 1720 for (i = 0; i < matin->rmap->n; i++) { 1721 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1722 if (max < tmp) max = tmp; 1723 } 1724 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1725 } 1726 1727 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1728 lrow = row - rstart; 1729 1730 pvA = &vworkA; 1731 pcA = &cworkA; 1732 pvB = &vworkB; 1733 pcB = &cworkB; 1734 if (!v) { 1735 pvA = NULL; 1736 pvB = NULL; 1737 } 1738 if (!idx) { 1739 pcA = NULL; 1740 if (!v) pcB = NULL; 1741 } 1742 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1743 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1744 nztot = nzA + nzB; 1745 1746 cmap = mat->garray; 1747 if (v || idx) { 1748 if (nztot) { 1749 /* Sort by increasing column numbers, assuming A and B already sorted */ 1750 PetscInt imark = -1; 1751 if (v) { 1752 *v = v_p = mat->rowvalues; 1753 for (i = 0; i < nzB; i++) { 1754 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1755 else break; 1756 } 1757 imark = i; 1758 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1759 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1760 } 1761 if (idx) { 1762 *idx = idx_p = mat->rowindices; 1763 if (imark > -1) { 1764 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1765 } else { 1766 for (i = 0; i < nzB; i++) { 1767 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1768 else break; 1769 } 1770 imark = i; 1771 } 1772 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1773 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1774 } 1775 } else { 1776 if (idx) *idx = NULL; 1777 if (v) *v = NULL; 1778 } 1779 } 1780 *nz = nztot; 1781 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1782 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1787 { 1788 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1789 1790 PetscFunctionBegin; 1791 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1792 aij->getrowactive = PETSC_FALSE; 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1800 PetscInt i, j, cstart = mat->cmap->rstart; 1801 PetscReal sum = 0.0; 1802 const MatScalar *v, *amata, *bmata; 1803 PetscMPIInt iN; 1804 1805 PetscFunctionBegin; 1806 if (aij->size == 1) { 1807 PetscCall(MatNorm(aij->A, type, norm)); 1808 } else { 1809 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1810 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1811 if (type == NORM_FROBENIUS) { 1812 v = amata; 1813 for (i = 0; i < amat->nz; i++) { 1814 sum += PetscRealPart(PetscConj(*v) * (*v)); 1815 v++; 1816 } 1817 v = bmata; 1818 for (i = 0; i < bmat->nz; i++) { 1819 sum += PetscRealPart(PetscConj(*v) * (*v)); 1820 v++; 1821 } 1822 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1823 *norm = PetscSqrtReal(*norm); 1824 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1825 } else if (type == NORM_1) { /* max column norm */ 1826 PetscReal *tmp, *tmp2; 1827 PetscInt *jj, *garray = aij->garray; 1828 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1829 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1830 *norm = 0.0; 1831 v = amata; 1832 jj = amat->j; 1833 for (j = 0; j < amat->nz; j++) { 1834 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1835 v++; 1836 } 1837 v = bmata; 1838 jj = bmat->j; 1839 for (j = 0; j < bmat->nz; j++) { 1840 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1841 v++; 1842 } 1843 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 1844 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1845 for (j = 0; j < mat->cmap->N; j++) { 1846 if (tmp2[j] > *norm) *norm = tmp2[j]; 1847 } 1848 PetscCall(PetscFree(tmp)); 1849 PetscCall(PetscFree(tmp2)); 1850 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1851 } else if (type == NORM_INFINITY) { /* max row norm */ 1852 PetscReal ntemp = 0.0; 1853 for (j = 0; j < aij->A->rmap->n; j++) { 1854 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1855 sum = 0.0; 1856 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1857 sum += PetscAbsScalar(*v); 1858 v++; 1859 } 1860 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1861 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1862 sum += PetscAbsScalar(*v); 1863 v++; 1864 } 1865 if (sum > ntemp) ntemp = sum; 1866 } 1867 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1868 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1869 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1870 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1871 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1872 } 1873 PetscFunctionReturn(PETSC_SUCCESS); 1874 } 1875 1876 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1877 { 1878 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1879 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1880 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1881 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1882 Mat B, A_diag, *B_diag; 1883 const MatScalar *pbv, *bv; 1884 1885 PetscFunctionBegin; 1886 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1887 ma = A->rmap->n; 1888 na = A->cmap->n; 1889 mb = a->B->rmap->n; 1890 nb = a->B->cmap->n; 1891 ai = Aloc->i; 1892 aj = Aloc->j; 1893 bi = Bloc->i; 1894 bj = Bloc->j; 1895 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1896 PetscInt *d_nnz, *g_nnz, *o_nnz; 1897 PetscSFNode *oloc; 1898 PETSC_UNUSED PetscSF sf; 1899 1900 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1901 /* compute d_nnz for preallocation */ 1902 PetscCall(PetscArrayzero(d_nnz, na)); 1903 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1904 /* compute local off-diagonal contributions */ 1905 PetscCall(PetscArrayzero(g_nnz, nb)); 1906 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1907 /* map those to global */ 1908 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1909 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1910 PetscCall(PetscSFSetFromOptions(sf)); 1911 PetscCall(PetscArrayzero(o_nnz, na)); 1912 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1913 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1914 PetscCall(PetscSFDestroy(&sf)); 1915 1916 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1917 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1918 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1919 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1920 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1921 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1922 } else { 1923 B = *matout; 1924 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1925 } 1926 1927 b = (Mat_MPIAIJ *)B->data; 1928 A_diag = a->A; 1929 B_diag = &b->A; 1930 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1931 A_diag_ncol = A_diag->cmap->N; 1932 B_diag_ilen = sub_B_diag->ilen; 1933 B_diag_i = sub_B_diag->i; 1934 1935 /* Set ilen for diagonal of B */ 1936 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1937 1938 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1939 very quickly (=without using MatSetValues), because all writes are local. */ 1940 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1941 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1942 1943 /* copy over the B part */ 1944 PetscCall(PetscMalloc1(bi[mb], &cols)); 1945 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1946 pbv = bv; 1947 row = A->rmap->rstart; 1948 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1949 cols_tmp = cols; 1950 for (i = 0; i < mb; i++) { 1951 ncol = bi[i + 1] - bi[i]; 1952 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1953 row++; 1954 if (pbv) pbv += ncol; 1955 if (cols_tmp) cols_tmp += ncol; 1956 } 1957 PetscCall(PetscFree(cols)); 1958 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1959 1960 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1961 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1962 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1963 *matout = B; 1964 } else { 1965 PetscCall(MatHeaderMerge(A, &B)); 1966 } 1967 PetscFunctionReturn(PETSC_SUCCESS); 1968 } 1969 1970 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1971 { 1972 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1973 Mat a = aij->A, b = aij->B; 1974 PetscInt s1, s2, s3; 1975 1976 PetscFunctionBegin; 1977 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1978 if (rr) { 1979 PetscCall(VecGetLocalSize(rr, &s1)); 1980 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1981 /* Overlap communication with computation. */ 1982 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1983 } 1984 if (ll) { 1985 PetscCall(VecGetLocalSize(ll, &s1)); 1986 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1987 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1988 } 1989 /* scale the diagonal block */ 1990 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1991 1992 if (rr) { 1993 /* Do a scatter end and then right scale the off-diagonal block */ 1994 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1995 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1996 } 1997 PetscFunctionReturn(PETSC_SUCCESS); 1998 } 1999 2000 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2001 { 2002 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2003 2004 PetscFunctionBegin; 2005 PetscCall(MatSetUnfactored(a->A)); 2006 PetscFunctionReturn(PETSC_SUCCESS); 2007 } 2008 2009 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2010 { 2011 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2012 Mat a, b, c, d; 2013 PetscBool flg; 2014 2015 PetscFunctionBegin; 2016 a = matA->A; 2017 b = matA->B; 2018 c = matB->A; 2019 d = matB->B; 2020 2021 PetscCall(MatEqual(a, c, &flg)); 2022 if (flg) PetscCall(MatEqual(b, d, &flg)); 2023 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2024 PetscFunctionReturn(PETSC_SUCCESS); 2025 } 2026 2027 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2028 { 2029 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2030 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2031 2032 PetscFunctionBegin; 2033 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2034 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2035 /* because of the column compression in the off-processor part of the matrix a->B, 2036 the number of columns in a->B and b->B may be different, hence we cannot call 2037 the MatCopy() directly on the two parts. If need be, we can provide a more 2038 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2039 then copying the submatrices */ 2040 PetscCall(MatCopy_Basic(A, B, str)); 2041 } else { 2042 PetscCall(MatCopy(a->A, b->A, str)); 2043 PetscCall(MatCopy(a->B, b->B, str)); 2044 } 2045 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2046 PetscFunctionReturn(PETSC_SUCCESS); 2047 } 2048 2049 /* 2050 Computes the number of nonzeros per row needed for preallocation when X and Y 2051 have different nonzero structure. 2052 */ 2053 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2054 { 2055 PetscInt i, j, k, nzx, nzy; 2056 2057 PetscFunctionBegin; 2058 /* Set the number of nonzeros in the new matrix */ 2059 for (i = 0; i < m; i++) { 2060 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2061 nzx = xi[i + 1] - xi[i]; 2062 nzy = yi[i + 1] - yi[i]; 2063 nnz[i] = 0; 2064 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2065 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2066 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2067 nnz[i]++; 2068 } 2069 for (; k < nzy; k++) nnz[i]++; 2070 } 2071 PetscFunctionReturn(PETSC_SUCCESS); 2072 } 2073 2074 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2075 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2076 { 2077 PetscInt m = Y->rmap->N; 2078 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2079 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2080 2081 PetscFunctionBegin; 2082 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2083 PetscFunctionReturn(PETSC_SUCCESS); 2084 } 2085 2086 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2087 { 2088 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2089 2090 PetscFunctionBegin; 2091 if (str == SAME_NONZERO_PATTERN) { 2092 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2093 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2094 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2095 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2096 } else { 2097 Mat B; 2098 PetscInt *nnz_d, *nnz_o; 2099 2100 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2101 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2102 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2103 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2104 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2105 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2106 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2107 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2108 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2109 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2110 PetscCall(MatHeaderMerge(Y, &B)); 2111 PetscCall(PetscFree(nnz_d)); 2112 PetscCall(PetscFree(nnz_o)); 2113 } 2114 PetscFunctionReturn(PETSC_SUCCESS); 2115 } 2116 2117 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2118 2119 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2120 { 2121 PetscFunctionBegin; 2122 if (PetscDefined(USE_COMPLEX)) { 2123 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2124 2125 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2126 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2127 } 2128 PetscFunctionReturn(PETSC_SUCCESS); 2129 } 2130 2131 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2132 { 2133 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2134 2135 PetscFunctionBegin; 2136 PetscCall(MatRealPart(a->A)); 2137 PetscCall(MatRealPart(a->B)); 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2144 2145 PetscFunctionBegin; 2146 PetscCall(MatImaginaryPart(a->A)); 2147 PetscCall(MatImaginaryPart(a->B)); 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2152 { 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2154 PetscInt i, *idxb = NULL, m = A->rmap->n; 2155 PetscScalar *va, *vv; 2156 Vec vB, vA; 2157 const PetscScalar *vb; 2158 2159 PetscFunctionBegin; 2160 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2161 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2162 2163 PetscCall(VecGetArrayWrite(vA, &va)); 2164 if (idx) { 2165 for (i = 0; i < m; i++) { 2166 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2167 } 2168 } 2169 2170 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2171 PetscCall(PetscMalloc1(m, &idxb)); 2172 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2173 2174 PetscCall(VecGetArrayWrite(v, &vv)); 2175 PetscCall(VecGetArrayRead(vB, &vb)); 2176 for (i = 0; i < m; i++) { 2177 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2178 vv[i] = vb[i]; 2179 if (idx) idx[i] = a->garray[idxb[i]]; 2180 } else { 2181 vv[i] = va[i]; 2182 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2183 } 2184 } 2185 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2186 PetscCall(VecRestoreArrayWrite(vA, &va)); 2187 PetscCall(VecRestoreArrayRead(vB, &vb)); 2188 PetscCall(PetscFree(idxb)); 2189 PetscCall(VecDestroy(&vA)); 2190 PetscCall(VecDestroy(&vB)); 2191 PetscFunctionReturn(PETSC_SUCCESS); 2192 } 2193 2194 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2195 { 2196 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2197 Vec vB, vA; 2198 2199 PetscFunctionBegin; 2200 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2201 PetscCall(MatGetRowSumAbs(a->A, vA)); 2202 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2203 PetscCall(MatGetRowSumAbs(a->B, vB)); 2204 PetscCall(VecAXPY(vA, 1.0, vB)); 2205 PetscCall(VecDestroy(&vB)); 2206 PetscCall(VecCopy(vA, v)); 2207 PetscCall(VecDestroy(&vA)); 2208 PetscFunctionReturn(PETSC_SUCCESS); 2209 } 2210 2211 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2212 { 2213 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2214 PetscInt m = A->rmap->n, n = A->cmap->n; 2215 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2216 PetscInt *cmap = mat->garray; 2217 PetscInt *diagIdx, *offdiagIdx; 2218 Vec diagV, offdiagV; 2219 PetscScalar *a, *diagA, *offdiagA; 2220 const PetscScalar *ba, *bav; 2221 PetscInt r, j, col, ncols, *bi, *bj; 2222 Mat B = mat->B; 2223 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2224 2225 PetscFunctionBegin; 2226 /* When a process holds entire A and other processes have no entry */ 2227 if (A->cmap->N == n) { 2228 PetscCall(VecGetArrayWrite(v, &diagA)); 2229 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2230 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2231 PetscCall(VecDestroy(&diagV)); 2232 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2233 PetscFunctionReturn(PETSC_SUCCESS); 2234 } else if (n == 0) { 2235 if (m) { 2236 PetscCall(VecGetArrayWrite(v, &a)); 2237 for (r = 0; r < m; r++) { 2238 a[r] = 0.0; 2239 if (idx) idx[r] = -1; 2240 } 2241 PetscCall(VecRestoreArrayWrite(v, &a)); 2242 } 2243 PetscFunctionReturn(PETSC_SUCCESS); 2244 } 2245 2246 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2250 2251 /* Get offdiagIdx[] for implicit 0.0 */ 2252 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2253 ba = bav; 2254 bi = b->i; 2255 bj = b->j; 2256 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2257 for (r = 0; r < m; r++) { 2258 ncols = bi[r + 1] - bi[r]; 2259 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2260 offdiagA[r] = *ba; 2261 offdiagIdx[r] = cmap[0]; 2262 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2263 offdiagA[r] = 0.0; 2264 2265 /* Find first hole in the cmap */ 2266 for (j = 0; j < ncols; j++) { 2267 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2268 if (col > j && j < cstart) { 2269 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2270 break; 2271 } else if (col > j + n && j >= cstart) { 2272 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2273 break; 2274 } 2275 } 2276 if (j == ncols && ncols < A->cmap->N - n) { 2277 /* a hole is outside compressed Bcols */ 2278 if (ncols == 0) { 2279 if (cstart) { 2280 offdiagIdx[r] = 0; 2281 } else offdiagIdx[r] = cend; 2282 } else { /* ncols > 0 */ 2283 offdiagIdx[r] = cmap[ncols - 1] + 1; 2284 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2285 } 2286 } 2287 } 2288 2289 for (j = 0; j < ncols; j++) { 2290 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2291 offdiagA[r] = *ba; 2292 offdiagIdx[r] = cmap[*bj]; 2293 } 2294 ba++; 2295 bj++; 2296 } 2297 } 2298 2299 PetscCall(VecGetArrayWrite(v, &a)); 2300 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2301 for (r = 0; r < m; ++r) { 2302 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2303 a[r] = diagA[r]; 2304 if (idx) idx[r] = cstart + diagIdx[r]; 2305 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2306 a[r] = diagA[r]; 2307 if (idx) { 2308 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2309 idx[r] = cstart + diagIdx[r]; 2310 } else idx[r] = offdiagIdx[r]; 2311 } 2312 } else { 2313 a[r] = offdiagA[r]; 2314 if (idx) idx[r] = offdiagIdx[r]; 2315 } 2316 } 2317 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2318 PetscCall(VecRestoreArrayWrite(v, &a)); 2319 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2320 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2321 PetscCall(VecDestroy(&diagV)); 2322 PetscCall(VecDestroy(&offdiagV)); 2323 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2324 PetscFunctionReturn(PETSC_SUCCESS); 2325 } 2326 2327 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2328 { 2329 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2330 PetscInt m = A->rmap->n, n = A->cmap->n; 2331 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2332 PetscInt *cmap = mat->garray; 2333 PetscInt *diagIdx, *offdiagIdx; 2334 Vec diagV, offdiagV; 2335 PetscScalar *a, *diagA, *offdiagA; 2336 const PetscScalar *ba, *bav; 2337 PetscInt r, j, col, ncols, *bi, *bj; 2338 Mat B = mat->B; 2339 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2340 2341 PetscFunctionBegin; 2342 /* When a process holds entire A and other processes have no entry */ 2343 if (A->cmap->N == n) { 2344 PetscCall(VecGetArrayWrite(v, &diagA)); 2345 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2346 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2347 PetscCall(VecDestroy(&diagV)); 2348 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2349 PetscFunctionReturn(PETSC_SUCCESS); 2350 } else if (n == 0) { 2351 if (m) { 2352 PetscCall(VecGetArrayWrite(v, &a)); 2353 for (r = 0; r < m; r++) { 2354 a[r] = PETSC_MAX_REAL; 2355 if (idx) idx[r] = -1; 2356 } 2357 PetscCall(VecRestoreArrayWrite(v, &a)); 2358 } 2359 PetscFunctionReturn(PETSC_SUCCESS); 2360 } 2361 2362 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2363 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2364 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2365 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2366 2367 /* Get offdiagIdx[] for implicit 0.0 */ 2368 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2369 ba = bav; 2370 bi = b->i; 2371 bj = b->j; 2372 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2373 for (r = 0; r < m; r++) { 2374 ncols = bi[r + 1] - bi[r]; 2375 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2376 offdiagA[r] = *ba; 2377 offdiagIdx[r] = cmap[0]; 2378 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2379 offdiagA[r] = 0.0; 2380 2381 /* Find first hole in the cmap */ 2382 for (j = 0; j < ncols; j++) { 2383 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2384 if (col > j && j < cstart) { 2385 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2386 break; 2387 } else if (col > j + n && j >= cstart) { 2388 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2389 break; 2390 } 2391 } 2392 if (j == ncols && ncols < A->cmap->N - n) { 2393 /* a hole is outside compressed Bcols */ 2394 if (ncols == 0) { 2395 if (cstart) { 2396 offdiagIdx[r] = 0; 2397 } else offdiagIdx[r] = cend; 2398 } else { /* ncols > 0 */ 2399 offdiagIdx[r] = cmap[ncols - 1] + 1; 2400 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2401 } 2402 } 2403 } 2404 2405 for (j = 0; j < ncols; j++) { 2406 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2407 offdiagA[r] = *ba; 2408 offdiagIdx[r] = cmap[*bj]; 2409 } 2410 ba++; 2411 bj++; 2412 } 2413 } 2414 2415 PetscCall(VecGetArrayWrite(v, &a)); 2416 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2417 for (r = 0; r < m; ++r) { 2418 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2419 a[r] = diagA[r]; 2420 if (idx) idx[r] = cstart + diagIdx[r]; 2421 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2422 a[r] = diagA[r]; 2423 if (idx) { 2424 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2425 idx[r] = cstart + diagIdx[r]; 2426 } else idx[r] = offdiagIdx[r]; 2427 } 2428 } else { 2429 a[r] = offdiagA[r]; 2430 if (idx) idx[r] = offdiagIdx[r]; 2431 } 2432 } 2433 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2434 PetscCall(VecRestoreArrayWrite(v, &a)); 2435 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2436 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2437 PetscCall(VecDestroy(&diagV)); 2438 PetscCall(VecDestroy(&offdiagV)); 2439 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2440 PetscFunctionReturn(PETSC_SUCCESS); 2441 } 2442 2443 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2444 { 2445 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2446 PetscInt m = A->rmap->n, n = A->cmap->n; 2447 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2448 PetscInt *cmap = mat->garray; 2449 PetscInt *diagIdx, *offdiagIdx; 2450 Vec diagV, offdiagV; 2451 PetscScalar *a, *diagA, *offdiagA; 2452 const PetscScalar *ba, *bav; 2453 PetscInt r, j, col, ncols, *bi, *bj; 2454 Mat B = mat->B; 2455 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2456 2457 PetscFunctionBegin; 2458 /* When a process holds entire A and other processes have no entry */ 2459 if (A->cmap->N == n) { 2460 PetscCall(VecGetArrayWrite(v, &diagA)); 2461 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2462 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2463 PetscCall(VecDestroy(&diagV)); 2464 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2465 PetscFunctionReturn(PETSC_SUCCESS); 2466 } else if (n == 0) { 2467 if (m) { 2468 PetscCall(VecGetArrayWrite(v, &a)); 2469 for (r = 0; r < m; r++) { 2470 a[r] = PETSC_MIN_REAL; 2471 if (idx) idx[r] = -1; 2472 } 2473 PetscCall(VecRestoreArrayWrite(v, &a)); 2474 } 2475 PetscFunctionReturn(PETSC_SUCCESS); 2476 } 2477 2478 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2479 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2480 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2481 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2482 2483 /* Get offdiagIdx[] for implicit 0.0 */ 2484 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2485 ba = bav; 2486 bi = b->i; 2487 bj = b->j; 2488 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2489 for (r = 0; r < m; r++) { 2490 ncols = bi[r + 1] - bi[r]; 2491 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2492 offdiagA[r] = *ba; 2493 offdiagIdx[r] = cmap[0]; 2494 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2495 offdiagA[r] = 0.0; 2496 2497 /* Find first hole in the cmap */ 2498 for (j = 0; j < ncols; j++) { 2499 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2500 if (col > j && j < cstart) { 2501 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2502 break; 2503 } else if (col > j + n && j >= cstart) { 2504 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2505 break; 2506 } 2507 } 2508 if (j == ncols && ncols < A->cmap->N - n) { 2509 /* a hole is outside compressed Bcols */ 2510 if (ncols == 0) { 2511 if (cstart) { 2512 offdiagIdx[r] = 0; 2513 } else offdiagIdx[r] = cend; 2514 } else { /* ncols > 0 */ 2515 offdiagIdx[r] = cmap[ncols - 1] + 1; 2516 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2517 } 2518 } 2519 } 2520 2521 for (j = 0; j < ncols; j++) { 2522 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2523 offdiagA[r] = *ba; 2524 offdiagIdx[r] = cmap[*bj]; 2525 } 2526 ba++; 2527 bj++; 2528 } 2529 } 2530 2531 PetscCall(VecGetArrayWrite(v, &a)); 2532 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2533 for (r = 0; r < m; ++r) { 2534 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2535 a[r] = diagA[r]; 2536 if (idx) idx[r] = cstart + diagIdx[r]; 2537 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2538 a[r] = diagA[r]; 2539 if (idx) { 2540 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2541 idx[r] = cstart + diagIdx[r]; 2542 } else idx[r] = offdiagIdx[r]; 2543 } 2544 } else { 2545 a[r] = offdiagA[r]; 2546 if (idx) idx[r] = offdiagIdx[r]; 2547 } 2548 } 2549 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2550 PetscCall(VecRestoreArrayWrite(v, &a)); 2551 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2552 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2553 PetscCall(VecDestroy(&diagV)); 2554 PetscCall(VecDestroy(&offdiagV)); 2555 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2556 PetscFunctionReturn(PETSC_SUCCESS); 2557 } 2558 2559 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2560 { 2561 Mat *dummy; 2562 2563 PetscFunctionBegin; 2564 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2565 *newmat = *dummy; 2566 PetscCall(PetscFree(dummy)); 2567 PetscFunctionReturn(PETSC_SUCCESS); 2568 } 2569 2570 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2571 { 2572 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2573 2574 PetscFunctionBegin; 2575 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2576 A->factorerrortype = a->A->factorerrortype; 2577 PetscFunctionReturn(PETSC_SUCCESS); 2578 } 2579 2580 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2581 { 2582 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2583 2584 PetscFunctionBegin; 2585 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2586 PetscCall(MatSetRandom(aij->A, rctx)); 2587 if (x->assembled) { 2588 PetscCall(MatSetRandom(aij->B, rctx)); 2589 } else { 2590 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2591 } 2592 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2593 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2598 { 2599 PetscFunctionBegin; 2600 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2601 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2602 PetscFunctionReturn(PETSC_SUCCESS); 2603 } 2604 2605 /*@ 2606 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2607 2608 Not Collective 2609 2610 Input Parameter: 2611 . A - the matrix 2612 2613 Output Parameter: 2614 . nz - the number of nonzeros 2615 2616 Level: advanced 2617 2618 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2619 @*/ 2620 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2621 { 2622 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2623 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2624 PetscBool isaij; 2625 2626 PetscFunctionBegin; 2627 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2628 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2629 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2630 PetscFunctionReturn(PETSC_SUCCESS); 2631 } 2632 2633 /*@ 2634 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2635 2636 Collective 2637 2638 Input Parameters: 2639 + A - the matrix 2640 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2641 2642 Level: advanced 2643 2644 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2645 @*/ 2646 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2647 { 2648 PetscFunctionBegin; 2649 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2650 PetscFunctionReturn(PETSC_SUCCESS); 2651 } 2652 2653 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2654 { 2655 PetscBool sc = PETSC_FALSE, flg; 2656 2657 PetscFunctionBegin; 2658 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2659 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2660 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2661 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2662 PetscOptionsHeadEnd(); 2663 PetscFunctionReturn(PETSC_SUCCESS); 2664 } 2665 2666 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2667 { 2668 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2669 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2670 2671 PetscFunctionBegin; 2672 if (!Y->preallocated) { 2673 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2674 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2675 PetscInt nonew = aij->nonew; 2676 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2677 aij->nonew = nonew; 2678 } 2679 PetscCall(MatShift_Basic(Y, a)); 2680 PetscFunctionReturn(PETSC_SUCCESS); 2681 } 2682 2683 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2684 { 2685 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2686 2687 PetscFunctionBegin; 2688 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2689 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2690 if (d) { 2691 PetscInt rstart; 2692 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2693 *d += rstart; 2694 } 2695 PetscFunctionReturn(PETSC_SUCCESS); 2696 } 2697 2698 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2699 { 2700 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2701 2702 PetscFunctionBegin; 2703 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2704 PetscFunctionReturn(PETSC_SUCCESS); 2705 } 2706 2707 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2708 { 2709 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2710 2711 PetscFunctionBegin; 2712 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2713 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2714 PetscFunctionReturn(PETSC_SUCCESS); 2715 } 2716 2717 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2718 MatGetRow_MPIAIJ, 2719 MatRestoreRow_MPIAIJ, 2720 MatMult_MPIAIJ, 2721 /* 4*/ MatMultAdd_MPIAIJ, 2722 MatMultTranspose_MPIAIJ, 2723 MatMultTransposeAdd_MPIAIJ, 2724 NULL, 2725 NULL, 2726 NULL, 2727 /*10*/ NULL, 2728 NULL, 2729 NULL, 2730 MatSOR_MPIAIJ, 2731 MatTranspose_MPIAIJ, 2732 /*15*/ MatGetInfo_MPIAIJ, 2733 MatEqual_MPIAIJ, 2734 MatGetDiagonal_MPIAIJ, 2735 MatDiagonalScale_MPIAIJ, 2736 MatNorm_MPIAIJ, 2737 /*20*/ MatAssemblyBegin_MPIAIJ, 2738 MatAssemblyEnd_MPIAIJ, 2739 MatSetOption_MPIAIJ, 2740 MatZeroEntries_MPIAIJ, 2741 /*24*/ MatZeroRows_MPIAIJ, 2742 NULL, 2743 NULL, 2744 NULL, 2745 NULL, 2746 /*29*/ MatSetUp_MPI_Hash, 2747 NULL, 2748 NULL, 2749 MatGetDiagonalBlock_MPIAIJ, 2750 NULL, 2751 /*34*/ MatDuplicate_MPIAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 NULL, 2756 /*39*/ MatAXPY_MPIAIJ, 2757 MatCreateSubMatrices_MPIAIJ, 2758 MatIncreaseOverlap_MPIAIJ, 2759 MatGetValues_MPIAIJ, 2760 MatCopy_MPIAIJ, 2761 /*44*/ MatGetRowMax_MPIAIJ, 2762 MatScale_MPIAIJ, 2763 MatShift_MPIAIJ, 2764 MatDiagonalSet_MPIAIJ, 2765 MatZeroRowsColumns_MPIAIJ, 2766 /*49*/ MatSetRandom_MPIAIJ, 2767 MatGetRowIJ_MPIAIJ, 2768 MatRestoreRowIJ_MPIAIJ, 2769 NULL, 2770 NULL, 2771 /*54*/ MatFDColoringCreate_MPIXAIJ, 2772 NULL, 2773 MatSetUnfactored_MPIAIJ, 2774 MatPermute_MPIAIJ, 2775 NULL, 2776 /*59*/ MatCreateSubMatrix_MPIAIJ, 2777 MatDestroy_MPIAIJ, 2778 MatView_MPIAIJ, 2779 NULL, 2780 NULL, 2781 /*64*/ NULL, 2782 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2783 NULL, 2784 NULL, 2785 NULL, 2786 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2787 MatGetRowMinAbs_MPIAIJ, 2788 NULL, 2789 NULL, 2790 NULL, 2791 NULL, 2792 /*75*/ MatFDColoringApply_AIJ, 2793 MatSetFromOptions_MPIAIJ, 2794 NULL, 2795 NULL, 2796 MatFindZeroDiagonals_MPIAIJ, 2797 /*80*/ NULL, 2798 NULL, 2799 NULL, 2800 /*83*/ MatLoad_MPIAIJ, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 /*89*/ NULL, 2807 NULL, 2808 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2809 NULL, 2810 NULL, 2811 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2812 NULL, 2813 NULL, 2814 NULL, 2815 MatBindToCPU_MPIAIJ, 2816 /*99*/ MatProductSetFromOptions_MPIAIJ, 2817 NULL, 2818 NULL, 2819 MatConjugate_MPIAIJ, 2820 NULL, 2821 /*104*/ MatSetValuesRow_MPIAIJ, 2822 MatRealPart_MPIAIJ, 2823 MatImaginaryPart_MPIAIJ, 2824 NULL, 2825 NULL, 2826 /*109*/ NULL, 2827 NULL, 2828 MatGetRowMin_MPIAIJ, 2829 NULL, 2830 MatMissingDiagonal_MPIAIJ, 2831 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2832 NULL, 2833 MatGetGhosts_MPIAIJ, 2834 NULL, 2835 NULL, 2836 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2837 NULL, 2838 NULL, 2839 NULL, 2840 MatGetMultiProcBlock_MPIAIJ, 2841 /*124*/ MatFindNonzeroRows_MPIAIJ, 2842 MatGetColumnReductions_MPIAIJ, 2843 MatInvertBlockDiagonal_MPIAIJ, 2844 MatInvertVariableBlockDiagonal_MPIAIJ, 2845 MatCreateSubMatricesMPI_MPIAIJ, 2846 /*129*/ NULL, 2847 NULL, 2848 NULL, 2849 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2850 NULL, 2851 /*134*/ NULL, 2852 NULL, 2853 NULL, 2854 NULL, 2855 NULL, 2856 /*139*/ MatSetBlockSizes_MPIAIJ, 2857 NULL, 2858 NULL, 2859 MatFDColoringSetUp_MPIXAIJ, 2860 MatFindOffBlockDiagonalEntries_MPIAIJ, 2861 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2862 /*145*/ NULL, 2863 NULL, 2864 NULL, 2865 MatCreateGraph_Simple_AIJ, 2866 NULL, 2867 /*150*/ NULL, 2868 MatEliminateZeros_MPIAIJ, 2869 MatGetRowSumAbs_MPIAIJ, 2870 NULL, 2871 NULL, 2872 NULL}; 2873 2874 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2875 { 2876 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2877 2878 PetscFunctionBegin; 2879 PetscCall(MatStoreValues(aij->A)); 2880 PetscCall(MatStoreValues(aij->B)); 2881 PetscFunctionReturn(PETSC_SUCCESS); 2882 } 2883 2884 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2885 { 2886 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2887 2888 PetscFunctionBegin; 2889 PetscCall(MatRetrieveValues(aij->A)); 2890 PetscCall(MatRetrieveValues(aij->B)); 2891 PetscFunctionReturn(PETSC_SUCCESS); 2892 } 2893 2894 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2895 { 2896 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2897 PetscMPIInt size; 2898 2899 PetscFunctionBegin; 2900 if (B->hash_active) { 2901 B->ops[0] = b->cops; 2902 B->hash_active = PETSC_FALSE; 2903 } 2904 PetscCall(PetscLayoutSetUp(B->rmap)); 2905 PetscCall(PetscLayoutSetUp(B->cmap)); 2906 2907 #if defined(PETSC_USE_CTABLE) 2908 PetscCall(PetscHMapIDestroy(&b->colmap)); 2909 #else 2910 PetscCall(PetscFree(b->colmap)); 2911 #endif 2912 PetscCall(PetscFree(b->garray)); 2913 PetscCall(VecDestroy(&b->lvec)); 2914 PetscCall(VecScatterDestroy(&b->Mvctx)); 2915 2916 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2917 2918 MatSeqXAIJGetOptions_Private(b->B); 2919 PetscCall(MatDestroy(&b->B)); 2920 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2921 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2922 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2923 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2924 MatSeqXAIJRestoreOptions_Private(b->B); 2925 2926 MatSeqXAIJGetOptions_Private(b->A); 2927 PetscCall(MatDestroy(&b->A)); 2928 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2929 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2930 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2931 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2932 MatSeqXAIJRestoreOptions_Private(b->A); 2933 2934 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2935 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2936 B->preallocated = PETSC_TRUE; 2937 B->was_assembled = PETSC_FALSE; 2938 B->assembled = PETSC_FALSE; 2939 PetscFunctionReturn(PETSC_SUCCESS); 2940 } 2941 2942 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2943 { 2944 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2945 2946 PetscFunctionBegin; 2947 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2948 PetscCall(PetscLayoutSetUp(B->rmap)); 2949 PetscCall(PetscLayoutSetUp(B->cmap)); 2950 2951 #if defined(PETSC_USE_CTABLE) 2952 PetscCall(PetscHMapIDestroy(&b->colmap)); 2953 #else 2954 PetscCall(PetscFree(b->colmap)); 2955 #endif 2956 PetscCall(PetscFree(b->garray)); 2957 PetscCall(VecDestroy(&b->lvec)); 2958 PetscCall(VecScatterDestroy(&b->Mvctx)); 2959 2960 PetscCall(MatResetPreallocation(b->A)); 2961 PetscCall(MatResetPreallocation(b->B)); 2962 B->preallocated = PETSC_TRUE; 2963 B->was_assembled = PETSC_FALSE; 2964 B->assembled = PETSC_FALSE; 2965 PetscFunctionReturn(PETSC_SUCCESS); 2966 } 2967 2968 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2969 { 2970 Mat mat; 2971 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2972 2973 PetscFunctionBegin; 2974 *newmat = NULL; 2975 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2976 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2977 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2978 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2979 a = (Mat_MPIAIJ *)mat->data; 2980 2981 mat->factortype = matin->factortype; 2982 mat->assembled = matin->assembled; 2983 mat->insertmode = NOT_SET_VALUES; 2984 2985 a->size = oldmat->size; 2986 a->rank = oldmat->rank; 2987 a->donotstash = oldmat->donotstash; 2988 a->roworiented = oldmat->roworiented; 2989 a->rowindices = NULL; 2990 a->rowvalues = NULL; 2991 a->getrowactive = PETSC_FALSE; 2992 2993 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2994 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2995 if (matin->hash_active) { 2996 PetscCall(MatSetUp(mat)); 2997 } else { 2998 mat->preallocated = matin->preallocated; 2999 if (oldmat->colmap) { 3000 #if defined(PETSC_USE_CTABLE) 3001 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3002 #else 3003 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3004 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3005 #endif 3006 } else a->colmap = NULL; 3007 if (oldmat->garray) { 3008 PetscInt len; 3009 len = oldmat->B->cmap->n; 3010 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3011 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3012 } else a->garray = NULL; 3013 3014 /* It may happen MatDuplicate is called with a non-assembled matrix 3015 In fact, MatDuplicate only requires the matrix to be preallocated 3016 This may happen inside a DMCreateMatrix_Shell */ 3017 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3018 if (oldmat->Mvctx) { 3019 a->Mvctx = oldmat->Mvctx; 3020 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3021 } 3022 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3023 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3024 } 3025 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3026 *newmat = mat; 3027 PetscFunctionReturn(PETSC_SUCCESS); 3028 } 3029 3030 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3031 { 3032 PetscBool isbinary, ishdf5; 3033 3034 PetscFunctionBegin; 3035 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3036 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3037 /* force binary viewer to load .info file if it has not yet done so */ 3038 PetscCall(PetscViewerSetUp(viewer)); 3039 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3040 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3041 if (isbinary) { 3042 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3043 } else if (ishdf5) { 3044 #if defined(PETSC_HAVE_HDF5) 3045 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3046 #else 3047 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3048 #endif 3049 } else { 3050 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3051 } 3052 PetscFunctionReturn(PETSC_SUCCESS); 3053 } 3054 3055 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3056 { 3057 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3058 PetscInt *rowidxs, *colidxs; 3059 PetscScalar *matvals; 3060 3061 PetscFunctionBegin; 3062 PetscCall(PetscViewerSetUp(viewer)); 3063 3064 /* read in matrix header */ 3065 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3066 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3067 M = header[1]; 3068 N = header[2]; 3069 nz = header[3]; 3070 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3071 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3072 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3073 3074 /* set block sizes from the viewer's .info file */ 3075 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3076 /* set global sizes if not set already */ 3077 if (mat->rmap->N < 0) mat->rmap->N = M; 3078 if (mat->cmap->N < 0) mat->cmap->N = N; 3079 PetscCall(PetscLayoutSetUp(mat->rmap)); 3080 PetscCall(PetscLayoutSetUp(mat->cmap)); 3081 3082 /* check if the matrix sizes are correct */ 3083 PetscCall(MatGetSize(mat, &rows, &cols)); 3084 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3085 3086 /* read in row lengths and build row indices */ 3087 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3088 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3089 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3090 rowidxs[0] = 0; 3091 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3092 if (nz != PETSC_INT_MAX) { 3093 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3094 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3095 } 3096 3097 /* read in column indices and matrix values */ 3098 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3100 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3101 /* store matrix indices and values */ 3102 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3103 PetscCall(PetscFree(rowidxs)); 3104 PetscCall(PetscFree2(colidxs, matvals)); 3105 PetscFunctionReturn(PETSC_SUCCESS); 3106 } 3107 3108 /* Not scalable because of ISAllGather() unless getting all columns. */ 3109 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3110 { 3111 IS iscol_local; 3112 PetscBool isstride; 3113 PetscMPIInt lisstride = 0, gisstride; 3114 3115 PetscFunctionBegin; 3116 /* check if we are grabbing all columns*/ 3117 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3118 3119 if (isstride) { 3120 PetscInt start, len, mstart, mlen; 3121 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3122 PetscCall(ISGetLocalSize(iscol, &len)); 3123 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3124 if (mstart == start && mlen - mstart == len) lisstride = 1; 3125 } 3126 3127 PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3128 if (gisstride) { 3129 PetscInt N; 3130 PetscCall(MatGetSize(mat, NULL, &N)); 3131 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3132 PetscCall(ISSetIdentity(iscol_local)); 3133 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3134 } else { 3135 PetscInt cbs; 3136 PetscCall(ISGetBlockSize(iscol, &cbs)); 3137 PetscCall(ISAllGather(iscol, &iscol_local)); 3138 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3139 } 3140 3141 *isseq = iscol_local; 3142 PetscFunctionReturn(PETSC_SUCCESS); 3143 } 3144 3145 /* 3146 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3147 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3148 3149 Input Parameters: 3150 + mat - matrix 3151 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3152 i.e., mat->rstart <= isrow[i] < mat->rend 3153 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3154 i.e., mat->cstart <= iscol[i] < mat->cend 3155 3156 Output Parameters: 3157 + isrow_d - sequential row index set for retrieving mat->A 3158 . iscol_d - sequential column index set for retrieving mat->A 3159 . iscol_o - sequential column index set for retrieving mat->B 3160 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3161 */ 3162 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3163 { 3164 Vec x, cmap; 3165 const PetscInt *is_idx; 3166 PetscScalar *xarray, *cmaparray; 3167 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3169 Mat B = a->B; 3170 Vec lvec = a->lvec, lcmap; 3171 PetscInt i, cstart, cend, Bn = B->cmap->N; 3172 MPI_Comm comm; 3173 VecScatter Mvctx = a->Mvctx; 3174 3175 PetscFunctionBegin; 3176 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3177 PetscCall(ISGetLocalSize(iscol, &ncols)); 3178 3179 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3180 PetscCall(MatCreateVecs(mat, &x, NULL)); 3181 PetscCall(VecSet(x, -1.0)); 3182 PetscCall(VecDuplicate(x, &cmap)); 3183 PetscCall(VecSet(cmap, -1.0)); 3184 3185 /* Get start indices */ 3186 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3187 isstart -= ncols; 3188 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3189 3190 PetscCall(ISGetIndices(iscol, &is_idx)); 3191 PetscCall(VecGetArray(x, &xarray)); 3192 PetscCall(VecGetArray(cmap, &cmaparray)); 3193 PetscCall(PetscMalloc1(ncols, &idx)); 3194 for (i = 0; i < ncols; i++) { 3195 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3196 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3197 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3198 } 3199 PetscCall(VecRestoreArray(x, &xarray)); 3200 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3201 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3202 3203 /* Get iscol_d */ 3204 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3205 PetscCall(ISGetBlockSize(iscol, &i)); 3206 PetscCall(ISSetBlockSize(*iscol_d, i)); 3207 3208 /* Get isrow_d */ 3209 PetscCall(ISGetLocalSize(isrow, &m)); 3210 rstart = mat->rmap->rstart; 3211 PetscCall(PetscMalloc1(m, &idx)); 3212 PetscCall(ISGetIndices(isrow, &is_idx)); 3213 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3214 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3215 3216 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3217 PetscCall(ISGetBlockSize(isrow, &i)); 3218 PetscCall(ISSetBlockSize(*isrow_d, i)); 3219 3220 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3221 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3222 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3223 3224 PetscCall(VecDuplicate(lvec, &lcmap)); 3225 3226 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3227 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3228 3229 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3230 /* off-process column indices */ 3231 count = 0; 3232 PetscCall(PetscMalloc1(Bn, &idx)); 3233 PetscCall(PetscMalloc1(Bn, &cmap1)); 3234 3235 PetscCall(VecGetArray(lvec, &xarray)); 3236 PetscCall(VecGetArray(lcmap, &cmaparray)); 3237 for (i = 0; i < Bn; i++) { 3238 if (PetscRealPart(xarray[i]) > -1.0) { 3239 idx[count] = i; /* local column index in off-diagonal part B */ 3240 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3241 count++; 3242 } 3243 } 3244 PetscCall(VecRestoreArray(lvec, &xarray)); 3245 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3246 3247 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3248 /* cannot ensure iscol_o has same blocksize as iscol! */ 3249 3250 PetscCall(PetscFree(idx)); 3251 *garray = cmap1; 3252 3253 PetscCall(VecDestroy(&x)); 3254 PetscCall(VecDestroy(&cmap)); 3255 PetscCall(VecDestroy(&lcmap)); 3256 PetscFunctionReturn(PETSC_SUCCESS); 3257 } 3258 3259 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3260 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3261 { 3262 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3263 Mat M = NULL; 3264 MPI_Comm comm; 3265 IS iscol_d, isrow_d, iscol_o; 3266 Mat Asub = NULL, Bsub = NULL; 3267 PetscInt n; 3268 3269 PetscFunctionBegin; 3270 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3271 3272 if (call == MAT_REUSE_MATRIX) { 3273 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3274 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3275 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3276 3277 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3278 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3279 3280 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3281 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3282 3283 /* Update diagonal and off-diagonal portions of submat */ 3284 asub = (Mat_MPIAIJ *)(*submat)->data; 3285 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3286 PetscCall(ISGetLocalSize(iscol_o, &n)); 3287 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3288 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3289 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3290 3291 } else { /* call == MAT_INITIAL_MATRIX) */ 3292 const PetscInt *garray; 3293 PetscInt BsubN; 3294 3295 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3296 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3297 3298 /* Create local submatrices Asub and Bsub */ 3299 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3300 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3301 3302 /* Create submatrix M */ 3303 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3304 3305 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3306 asub = (Mat_MPIAIJ *)M->data; 3307 3308 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3309 n = asub->B->cmap->N; 3310 if (BsubN > n) { 3311 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3312 const PetscInt *idx; 3313 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3314 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3315 3316 PetscCall(PetscMalloc1(n, &idx_new)); 3317 j = 0; 3318 PetscCall(ISGetIndices(iscol_o, &idx)); 3319 for (i = 0; i < n; i++) { 3320 if (j >= BsubN) break; 3321 while (subgarray[i] > garray[j]) j++; 3322 3323 if (subgarray[i] == garray[j]) { 3324 idx_new[i] = idx[j++]; 3325 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3326 } 3327 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3328 3329 PetscCall(ISDestroy(&iscol_o)); 3330 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3331 3332 } else if (BsubN < n) { 3333 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3334 } 3335 3336 PetscCall(PetscFree(garray)); 3337 *submat = M; 3338 3339 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3340 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3341 PetscCall(ISDestroy(&isrow_d)); 3342 3343 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3344 PetscCall(ISDestroy(&iscol_d)); 3345 3346 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3347 PetscCall(ISDestroy(&iscol_o)); 3348 } 3349 PetscFunctionReturn(PETSC_SUCCESS); 3350 } 3351 3352 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3353 { 3354 IS iscol_local = NULL, isrow_d; 3355 PetscInt csize; 3356 PetscInt n, i, j, start, end; 3357 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3358 MPI_Comm comm; 3359 3360 PetscFunctionBegin; 3361 /* If isrow has same processor distribution as mat, 3362 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3363 if (call == MAT_REUSE_MATRIX) { 3364 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3365 if (isrow_d) { 3366 sameRowDist = PETSC_TRUE; 3367 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3368 } else { 3369 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3370 if (iscol_local) { 3371 sameRowDist = PETSC_TRUE; 3372 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3373 } 3374 } 3375 } else { 3376 /* Check if isrow has same processor distribution as mat */ 3377 sameDist[0] = PETSC_FALSE; 3378 PetscCall(ISGetLocalSize(isrow, &n)); 3379 if (!n) { 3380 sameDist[0] = PETSC_TRUE; 3381 } else { 3382 PetscCall(ISGetMinMax(isrow, &i, &j)); 3383 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3384 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3385 } 3386 3387 /* Check if iscol has same processor distribution as mat */ 3388 sameDist[1] = PETSC_FALSE; 3389 PetscCall(ISGetLocalSize(iscol, &n)); 3390 if (!n) { 3391 sameDist[1] = PETSC_TRUE; 3392 } else { 3393 PetscCall(ISGetMinMax(iscol, &i, &j)); 3394 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3395 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3396 } 3397 3398 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3399 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3400 sameRowDist = tsameDist[0]; 3401 } 3402 3403 if (sameRowDist) { 3404 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3405 /* isrow and iscol have same processor distribution as mat */ 3406 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3407 PetscFunctionReturn(PETSC_SUCCESS); 3408 } else { /* sameRowDist */ 3409 /* isrow has same processor distribution as mat */ 3410 if (call == MAT_INITIAL_MATRIX) { 3411 PetscBool sorted; 3412 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3413 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3414 PetscCall(ISGetSize(iscol, &i)); 3415 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3416 3417 PetscCall(ISSorted(iscol_local, &sorted)); 3418 if (sorted) { 3419 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3420 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3421 PetscFunctionReturn(PETSC_SUCCESS); 3422 } 3423 } else { /* call == MAT_REUSE_MATRIX */ 3424 IS iscol_sub; 3425 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3426 if (iscol_sub) { 3427 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3428 PetscFunctionReturn(PETSC_SUCCESS); 3429 } 3430 } 3431 } 3432 } 3433 3434 /* General case: iscol -> iscol_local which has global size of iscol */ 3435 if (call == MAT_REUSE_MATRIX) { 3436 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3437 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3438 } else { 3439 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3440 } 3441 3442 PetscCall(ISGetLocalSize(iscol, &csize)); 3443 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3444 3445 if (call == MAT_INITIAL_MATRIX) { 3446 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3447 PetscCall(ISDestroy(&iscol_local)); 3448 } 3449 PetscFunctionReturn(PETSC_SUCCESS); 3450 } 3451 3452 /*@C 3453 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3454 and "off-diagonal" part of the matrix in CSR format. 3455 3456 Collective 3457 3458 Input Parameters: 3459 + comm - MPI communicator 3460 . A - "diagonal" portion of matrix 3461 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3462 - garray - global index of `B` columns 3463 3464 Output Parameter: 3465 . mat - the matrix, with input `A` as its local diagonal matrix 3466 3467 Level: advanced 3468 3469 Notes: 3470 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3471 3472 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3473 3474 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3475 @*/ 3476 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3477 { 3478 Mat_MPIAIJ *maij; 3479 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3480 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3481 const PetscScalar *oa; 3482 Mat Bnew; 3483 PetscInt m, n, N; 3484 MatType mpi_mat_type; 3485 3486 PetscFunctionBegin; 3487 PetscCall(MatCreate(comm, mat)); 3488 PetscCall(MatGetSize(A, &m, &n)); 3489 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3490 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3491 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3492 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3493 3494 /* Get global columns of mat */ 3495 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3496 3497 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3498 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3499 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3500 PetscCall(MatSetType(*mat, mpi_mat_type)); 3501 3502 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3503 maij = (Mat_MPIAIJ *)(*mat)->data; 3504 3505 (*mat)->preallocated = PETSC_TRUE; 3506 3507 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3508 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3509 3510 /* Set A as diagonal portion of *mat */ 3511 maij->A = A; 3512 3513 nz = oi[m]; 3514 for (i = 0; i < nz; i++) { 3515 col = oj[i]; 3516 oj[i] = garray[col]; 3517 } 3518 3519 /* Set Bnew as off-diagonal portion of *mat */ 3520 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3521 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3522 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3523 bnew = (Mat_SeqAIJ *)Bnew->data; 3524 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3525 maij->B = Bnew; 3526 3527 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3528 3529 b->free_a = PETSC_FALSE; 3530 b->free_ij = PETSC_FALSE; 3531 PetscCall(MatDestroy(&B)); 3532 3533 bnew->free_a = PETSC_TRUE; 3534 bnew->free_ij = PETSC_TRUE; 3535 3536 /* condense columns of maij->B */ 3537 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3538 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3539 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3540 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3541 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3542 PetscFunctionReturn(PETSC_SUCCESS); 3543 } 3544 3545 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3546 3547 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3548 { 3549 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3550 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3551 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3552 Mat M, Msub, B = a->B; 3553 MatScalar *aa; 3554 Mat_SeqAIJ *aij; 3555 PetscInt *garray = a->garray, *colsub, Ncols; 3556 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3557 IS iscol_sub, iscmap; 3558 const PetscInt *is_idx, *cmap; 3559 PetscBool allcolumns = PETSC_FALSE; 3560 MPI_Comm comm; 3561 3562 PetscFunctionBegin; 3563 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3564 if (call == MAT_REUSE_MATRIX) { 3565 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3566 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3567 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3568 3569 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3570 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3571 3572 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3573 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3574 3575 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3576 3577 } else { /* call == MAT_INITIAL_MATRIX) */ 3578 PetscBool flg; 3579 3580 PetscCall(ISGetLocalSize(iscol, &n)); 3581 PetscCall(ISGetSize(iscol, &Ncols)); 3582 3583 /* (1) iscol -> nonscalable iscol_local */ 3584 /* Check for special case: each processor gets entire matrix columns */ 3585 PetscCall(ISIdentity(iscol_local, &flg)); 3586 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3587 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3588 if (allcolumns) { 3589 iscol_sub = iscol_local; 3590 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3591 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3592 3593 } else { 3594 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3595 PetscInt *idx, *cmap1, k; 3596 PetscCall(PetscMalloc1(Ncols, &idx)); 3597 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3598 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3599 count = 0; 3600 k = 0; 3601 for (i = 0; i < Ncols; i++) { 3602 j = is_idx[i]; 3603 if (j >= cstart && j < cend) { 3604 /* diagonal part of mat */ 3605 idx[count] = j; 3606 cmap1[count++] = i; /* column index in submat */ 3607 } else if (Bn) { 3608 /* off-diagonal part of mat */ 3609 if (j == garray[k]) { 3610 idx[count] = j; 3611 cmap1[count++] = i; /* column index in submat */ 3612 } else if (j > garray[k]) { 3613 while (j > garray[k] && k < Bn - 1) k++; 3614 if (j == garray[k]) { 3615 idx[count] = j; 3616 cmap1[count++] = i; /* column index in submat */ 3617 } 3618 } 3619 } 3620 } 3621 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3622 3623 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3624 PetscCall(ISGetBlockSize(iscol, &cbs)); 3625 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3626 3627 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3628 } 3629 3630 /* (3) Create sequential Msub */ 3631 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3632 } 3633 3634 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3635 aij = (Mat_SeqAIJ *)Msub->data; 3636 ii = aij->i; 3637 PetscCall(ISGetIndices(iscmap, &cmap)); 3638 3639 /* 3640 m - number of local rows 3641 Ncols - number of columns (same on all processors) 3642 rstart - first row in new global matrix generated 3643 */ 3644 PetscCall(MatGetSize(Msub, &m, NULL)); 3645 3646 if (call == MAT_INITIAL_MATRIX) { 3647 /* (4) Create parallel newmat */ 3648 PetscMPIInt rank, size; 3649 PetscInt csize; 3650 3651 PetscCallMPI(MPI_Comm_size(comm, &size)); 3652 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3653 3654 /* 3655 Determine the number of non-zeros in the diagonal and off-diagonal 3656 portions of the matrix in order to do correct preallocation 3657 */ 3658 3659 /* first get start and end of "diagonal" columns */ 3660 PetscCall(ISGetLocalSize(iscol, &csize)); 3661 if (csize == PETSC_DECIDE) { 3662 PetscCall(ISGetSize(isrow, &mglobal)); 3663 if (mglobal == Ncols) { /* square matrix */ 3664 nlocal = m; 3665 } else { 3666 nlocal = Ncols / size + ((Ncols % size) > rank); 3667 } 3668 } else { 3669 nlocal = csize; 3670 } 3671 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3672 rstart = rend - nlocal; 3673 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3674 3675 /* next, compute all the lengths */ 3676 jj = aij->j; 3677 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3678 olens = dlens + m; 3679 for (i = 0; i < m; i++) { 3680 jend = ii[i + 1] - ii[i]; 3681 olen = 0; 3682 dlen = 0; 3683 for (j = 0; j < jend; j++) { 3684 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3685 else dlen++; 3686 jj++; 3687 } 3688 olens[i] = olen; 3689 dlens[i] = dlen; 3690 } 3691 3692 PetscCall(ISGetBlockSize(isrow, &bs)); 3693 PetscCall(ISGetBlockSize(iscol, &cbs)); 3694 3695 PetscCall(MatCreate(comm, &M)); 3696 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3697 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3698 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3699 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3700 PetscCall(PetscFree(dlens)); 3701 3702 } else { /* call == MAT_REUSE_MATRIX */ 3703 M = *newmat; 3704 PetscCall(MatGetLocalSize(M, &i, NULL)); 3705 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3706 PetscCall(MatZeroEntries(M)); 3707 /* 3708 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3709 rather than the slower MatSetValues(). 3710 */ 3711 M->was_assembled = PETSC_TRUE; 3712 M->assembled = PETSC_FALSE; 3713 } 3714 3715 /* (5) Set values of Msub to *newmat */ 3716 PetscCall(PetscMalloc1(count, &colsub)); 3717 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3718 3719 jj = aij->j; 3720 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3721 for (i = 0; i < m; i++) { 3722 row = rstart + i; 3723 nz = ii[i + 1] - ii[i]; 3724 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3725 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3726 jj += nz; 3727 aa += nz; 3728 } 3729 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3730 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3731 3732 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3733 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3734 3735 PetscCall(PetscFree(colsub)); 3736 3737 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3738 if (call == MAT_INITIAL_MATRIX) { 3739 *newmat = M; 3740 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3741 PetscCall(MatDestroy(&Msub)); 3742 3743 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3744 PetscCall(ISDestroy(&iscol_sub)); 3745 3746 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3747 PetscCall(ISDestroy(&iscmap)); 3748 3749 if (iscol_local) { 3750 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3751 PetscCall(ISDestroy(&iscol_local)); 3752 } 3753 } 3754 PetscFunctionReturn(PETSC_SUCCESS); 3755 } 3756 3757 /* 3758 Not great since it makes two copies of the submatrix, first an SeqAIJ 3759 in local and then by concatenating the local matrices the end result. 3760 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3761 3762 This requires a sequential iscol with all indices. 3763 */ 3764 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3765 { 3766 PetscMPIInt rank, size; 3767 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3768 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3769 Mat M, Mreuse; 3770 MatScalar *aa, *vwork; 3771 MPI_Comm comm; 3772 Mat_SeqAIJ *aij; 3773 PetscBool colflag, allcolumns = PETSC_FALSE; 3774 3775 PetscFunctionBegin; 3776 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3777 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3778 PetscCallMPI(MPI_Comm_size(comm, &size)); 3779 3780 /* Check for special case: each processor gets entire matrix columns */ 3781 PetscCall(ISIdentity(iscol, &colflag)); 3782 PetscCall(ISGetLocalSize(iscol, &n)); 3783 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3784 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3785 3786 if (call == MAT_REUSE_MATRIX) { 3787 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3788 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3789 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3790 } else { 3791 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3792 } 3793 3794 /* 3795 m - number of local rows 3796 n - number of columns (same on all processors) 3797 rstart - first row in new global matrix generated 3798 */ 3799 PetscCall(MatGetSize(Mreuse, &m, &n)); 3800 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3801 if (call == MAT_INITIAL_MATRIX) { 3802 aij = (Mat_SeqAIJ *)Mreuse->data; 3803 ii = aij->i; 3804 jj = aij->j; 3805 3806 /* 3807 Determine the number of non-zeros in the diagonal and off-diagonal 3808 portions of the matrix in order to do correct preallocation 3809 */ 3810 3811 /* first get start and end of "diagonal" columns */ 3812 if (csize == PETSC_DECIDE) { 3813 PetscCall(ISGetSize(isrow, &mglobal)); 3814 if (mglobal == n) { /* square matrix */ 3815 nlocal = m; 3816 } else { 3817 nlocal = n / size + ((n % size) > rank); 3818 } 3819 } else { 3820 nlocal = csize; 3821 } 3822 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3823 rstart = rend - nlocal; 3824 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3825 3826 /* next, compute all the lengths */ 3827 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3828 olens = dlens + m; 3829 for (i = 0; i < m; i++) { 3830 jend = ii[i + 1] - ii[i]; 3831 olen = 0; 3832 dlen = 0; 3833 for (j = 0; j < jend; j++) { 3834 if (*jj < rstart || *jj >= rend) olen++; 3835 else dlen++; 3836 jj++; 3837 } 3838 olens[i] = olen; 3839 dlens[i] = dlen; 3840 } 3841 PetscCall(MatCreate(comm, &M)); 3842 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3843 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3844 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3845 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3846 PetscCall(PetscFree(dlens)); 3847 } else { 3848 PetscInt ml, nl; 3849 3850 M = *newmat; 3851 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3852 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3853 PetscCall(MatZeroEntries(M)); 3854 /* 3855 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3856 rather than the slower MatSetValues(). 3857 */ 3858 M->was_assembled = PETSC_TRUE; 3859 M->assembled = PETSC_FALSE; 3860 } 3861 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3862 aij = (Mat_SeqAIJ *)Mreuse->data; 3863 ii = aij->i; 3864 jj = aij->j; 3865 3866 /* trigger copy to CPU if needed */ 3867 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3868 for (i = 0; i < m; i++) { 3869 row = rstart + i; 3870 nz = ii[i + 1] - ii[i]; 3871 cwork = jj; 3872 jj = PetscSafePointerPlusOffset(jj, nz); 3873 vwork = aa; 3874 aa = PetscSafePointerPlusOffset(aa, nz); 3875 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3876 } 3877 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3878 3879 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3880 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3881 *newmat = M; 3882 3883 /* save submatrix used in processor for next request */ 3884 if (call == MAT_INITIAL_MATRIX) { 3885 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3886 PetscCall(MatDestroy(&Mreuse)); 3887 } 3888 PetscFunctionReturn(PETSC_SUCCESS); 3889 } 3890 3891 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3892 { 3893 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3894 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3895 const PetscInt *JJ; 3896 PetscBool nooffprocentries; 3897 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3898 3899 PetscFunctionBegin; 3900 PetscCall(PetscLayoutSetUp(B->rmap)); 3901 PetscCall(PetscLayoutSetUp(B->cmap)); 3902 m = B->rmap->n; 3903 cstart = B->cmap->rstart; 3904 cend = B->cmap->rend; 3905 rstart = B->rmap->rstart; 3906 irstart = Ii[0]; 3907 3908 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3909 3910 if (PetscDefined(USE_DEBUG)) { 3911 for (i = 0; i < m; i++) { 3912 nnz = Ii[i + 1] - Ii[i]; 3913 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3914 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3915 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3916 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3917 } 3918 } 3919 3920 for (i = 0; i < m; i++) { 3921 nnz = Ii[i + 1] - Ii[i]; 3922 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3923 nnz_max = PetscMax(nnz_max, nnz); 3924 d = 0; 3925 for (j = 0; j < nnz; j++) { 3926 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3927 } 3928 d_nnz[i] = d; 3929 o_nnz[i] = nnz - d; 3930 } 3931 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3932 PetscCall(PetscFree2(d_nnz, o_nnz)); 3933 3934 for (i = 0; i < m; i++) { 3935 ii = i + rstart; 3936 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3937 } 3938 nooffprocentries = B->nooffprocentries; 3939 B->nooffprocentries = PETSC_TRUE; 3940 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3941 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3942 B->nooffprocentries = nooffprocentries; 3943 3944 /* count number of entries below block diagonal */ 3945 PetscCall(PetscFree(Aij->ld)); 3946 PetscCall(PetscCalloc1(m, &ld)); 3947 Aij->ld = ld; 3948 for (i = 0; i < m; i++) { 3949 nnz = Ii[i + 1] - Ii[i]; 3950 j = 0; 3951 while (j < nnz && J[j] < cstart) j++; 3952 ld[i] = j; 3953 if (J) J += nnz; 3954 } 3955 3956 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3957 PetscFunctionReturn(PETSC_SUCCESS); 3958 } 3959 3960 /*@ 3961 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3962 (the default parallel PETSc format). 3963 3964 Collective 3965 3966 Input Parameters: 3967 + B - the matrix 3968 . i - the indices into `j` for the start of each local row (indices start with zero) 3969 . j - the column indices for each local row (indices start with zero) 3970 - v - optional values in the matrix 3971 3972 Level: developer 3973 3974 Notes: 3975 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3976 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3977 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3978 3979 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3980 3981 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3982 3983 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3984 3985 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3986 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3987 3988 The format which is used for the sparse matrix input, is equivalent to a 3989 row-major ordering.. i.e for the following matrix, the input data expected is 3990 as shown 3991 .vb 3992 1 0 0 3993 2 0 3 P0 3994 ------- 3995 4 5 6 P1 3996 3997 Process0 [P0] rows_owned=[0,1] 3998 i = {0,1,3} [size = nrow+1 = 2+1] 3999 j = {0,0,2} [size = 3] 4000 v = {1,2,3} [size = 3] 4001 4002 Process1 [P1] rows_owned=[2] 4003 i = {0,3} [size = nrow+1 = 1+1] 4004 j = {0,1,2} [size = 3] 4005 v = {4,5,6} [size = 3] 4006 .ve 4007 4008 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4009 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4010 @*/ 4011 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4012 { 4013 PetscFunctionBegin; 4014 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4015 PetscFunctionReturn(PETSC_SUCCESS); 4016 } 4017 4018 /*@ 4019 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4020 (the default parallel PETSc format). For good matrix assembly performance 4021 the user should preallocate the matrix storage by setting the parameters 4022 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4023 4024 Collective 4025 4026 Input Parameters: 4027 + B - the matrix 4028 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4029 (same value is used for all local rows) 4030 . d_nnz - array containing the number of nonzeros in the various rows of the 4031 DIAGONAL portion of the local submatrix (possibly different for each row) 4032 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4033 The size of this array is equal to the number of local rows, i.e 'm'. 4034 For matrices that will be factored, you must leave room for (and set) 4035 the diagonal entry even if it is zero. 4036 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4037 submatrix (same value is used for all local rows). 4038 - o_nnz - array containing the number of nonzeros in the various rows of the 4039 OFF-DIAGONAL portion of the local submatrix (possibly different for 4040 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4041 structure. The size of this array is equal to the number 4042 of local rows, i.e 'm'. 4043 4044 Example Usage: 4045 Consider the following 8x8 matrix with 34 non-zero values, that is 4046 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4047 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4048 as follows 4049 4050 .vb 4051 1 2 0 | 0 3 0 | 0 4 4052 Proc0 0 5 6 | 7 0 0 | 8 0 4053 9 0 10 | 11 0 0 | 12 0 4054 ------------------------------------- 4055 13 0 14 | 15 16 17 | 0 0 4056 Proc1 0 18 0 | 19 20 21 | 0 0 4057 0 0 0 | 22 23 0 | 24 0 4058 ------------------------------------- 4059 Proc2 25 26 27 | 0 0 28 | 29 0 4060 30 0 0 | 31 32 33 | 0 34 4061 .ve 4062 4063 This can be represented as a collection of submatrices as 4064 .vb 4065 A B C 4066 D E F 4067 G H I 4068 .ve 4069 4070 Where the submatrices A,B,C are owned by proc0, D,E,F are 4071 owned by proc1, G,H,I are owned by proc2. 4072 4073 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4074 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4075 The 'M','N' parameters are 8,8, and have the same values on all procs. 4076 4077 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4078 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4079 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4080 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4081 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4082 matrix, and [DF] as another `MATSEQAIJ` matrix. 4083 4084 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4085 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4086 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4087 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4088 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4089 In this case, the values of `d_nz`, `o_nz` are 4090 .vb 4091 proc0 dnz = 2, o_nz = 2 4092 proc1 dnz = 3, o_nz = 2 4093 proc2 dnz = 1, o_nz = 4 4094 .ve 4095 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4096 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4097 for proc3. i.e we are using 12+15+10=37 storage locations to store 4098 34 values. 4099 4100 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4101 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4102 In the above case the values for `d_nnz`, `o_nnz` are 4103 .vb 4104 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4105 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4106 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4107 .ve 4108 Here the space allocated is sum of all the above values i.e 34, and 4109 hence pre-allocation is perfect. 4110 4111 Level: intermediate 4112 4113 Notes: 4114 If the *_nnz parameter is given then the *_nz parameter is ignored 4115 4116 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4117 storage. The stored row and column indices begin with zero. 4118 See [Sparse Matrices](sec_matsparse) for details. 4119 4120 The parallel matrix is partitioned such that the first m0 rows belong to 4121 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4122 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4123 4124 The DIAGONAL portion of the local submatrix of a processor can be defined 4125 as the submatrix which is obtained by extraction the part corresponding to 4126 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4127 first row that belongs to the processor, r2 is the last row belonging to 4128 the this processor, and c1-c2 is range of indices of the local part of a 4129 vector suitable for applying the matrix to. This is an mxn matrix. In the 4130 common case of a square matrix, the row and column ranges are the same and 4131 the DIAGONAL part is also square. The remaining portion of the local 4132 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4133 4134 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4135 4136 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4137 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4138 You can also run with the option `-info` and look for messages with the string 4139 malloc in them to see if additional memory allocation was needed. 4140 4141 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4142 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4143 @*/ 4144 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4145 { 4146 PetscFunctionBegin; 4147 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4148 PetscValidType(B, 1); 4149 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4150 PetscFunctionReturn(PETSC_SUCCESS); 4151 } 4152 4153 /*@ 4154 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4155 CSR format for the local rows. 4156 4157 Collective 4158 4159 Input Parameters: 4160 + comm - MPI communicator 4161 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4162 . n - This value should be the same as the local size used in creating the 4163 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4164 calculated if `N` is given) For square matrices n is almost always `m`. 4165 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4166 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4167 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4168 . j - global column indices 4169 - a - optional matrix values 4170 4171 Output Parameter: 4172 . mat - the matrix 4173 4174 Level: intermediate 4175 4176 Notes: 4177 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4178 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4179 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4180 4181 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4182 4183 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4184 4185 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4186 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4187 4188 The format which is used for the sparse matrix input, is equivalent to a 4189 row-major ordering, i.e., for the following matrix, the input data expected is 4190 as shown 4191 .vb 4192 1 0 0 4193 2 0 3 P0 4194 ------- 4195 4 5 6 P1 4196 4197 Process0 [P0] rows_owned=[0,1] 4198 i = {0,1,3} [size = nrow+1 = 2+1] 4199 j = {0,0,2} [size = 3] 4200 v = {1,2,3} [size = 3] 4201 4202 Process1 [P1] rows_owned=[2] 4203 i = {0,3} [size = nrow+1 = 1+1] 4204 j = {0,1,2} [size = 3] 4205 v = {4,5,6} [size = 3] 4206 .ve 4207 4208 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4209 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4210 @*/ 4211 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4212 { 4213 PetscFunctionBegin; 4214 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4215 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4216 PetscCall(MatCreate(comm, mat)); 4217 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4218 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4219 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4220 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4221 PetscFunctionReturn(PETSC_SUCCESS); 4222 } 4223 4224 /*@ 4225 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4226 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4227 from `MatCreateMPIAIJWithArrays()` 4228 4229 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4230 4231 Collective 4232 4233 Input Parameters: 4234 + mat - the matrix 4235 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4236 . n - This value should be the same as the local size used in creating the 4237 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4238 calculated if N is given) For square matrices n is almost always m. 4239 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4240 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4241 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4242 . J - column indices 4243 - v - matrix values 4244 4245 Level: deprecated 4246 4247 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4248 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4249 @*/ 4250 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4251 { 4252 PetscInt nnz, i; 4253 PetscBool nooffprocentries; 4254 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4255 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4256 PetscScalar *ad, *ao; 4257 PetscInt ldi, Iii, md; 4258 const PetscInt *Adi = Ad->i; 4259 PetscInt *ld = Aij->ld; 4260 4261 PetscFunctionBegin; 4262 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4263 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4264 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4265 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4266 4267 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4268 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4269 4270 for (i = 0; i < m; i++) { 4271 if (PetscDefined(USE_DEBUG)) { 4272 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4273 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4274 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4275 } 4276 } 4277 nnz = Ii[i + 1] - Ii[i]; 4278 Iii = Ii[i]; 4279 ldi = ld[i]; 4280 md = Adi[i + 1] - Adi[i]; 4281 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4282 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4283 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4284 ad += md; 4285 ao += nnz - md; 4286 } 4287 nooffprocentries = mat->nooffprocentries; 4288 mat->nooffprocentries = PETSC_TRUE; 4289 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4290 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4291 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4292 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4293 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4294 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4295 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4296 mat->nooffprocentries = nooffprocentries; 4297 PetscFunctionReturn(PETSC_SUCCESS); 4298 } 4299 4300 /*@ 4301 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4302 4303 Collective 4304 4305 Input Parameters: 4306 + mat - the matrix 4307 - v - matrix values, stored by row 4308 4309 Level: intermediate 4310 4311 Notes: 4312 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4313 4314 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4315 4316 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4317 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4318 @*/ 4319 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4320 { 4321 PetscInt nnz, i, m; 4322 PetscBool nooffprocentries; 4323 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4324 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4325 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4326 PetscScalar *ad, *ao; 4327 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4328 PetscInt ldi, Iii, md; 4329 PetscInt *ld = Aij->ld; 4330 4331 PetscFunctionBegin; 4332 m = mat->rmap->n; 4333 4334 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4335 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4336 Iii = 0; 4337 for (i = 0; i < m; i++) { 4338 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4339 ldi = ld[i]; 4340 md = Adi[i + 1] - Adi[i]; 4341 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4342 ad += md; 4343 if (ao) { 4344 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4345 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4346 ao += nnz - md; 4347 } 4348 Iii += nnz; 4349 } 4350 nooffprocentries = mat->nooffprocentries; 4351 mat->nooffprocentries = PETSC_TRUE; 4352 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4353 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4354 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4355 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4356 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4357 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4358 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4359 mat->nooffprocentries = nooffprocentries; 4360 PetscFunctionReturn(PETSC_SUCCESS); 4361 } 4362 4363 /*@ 4364 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4365 (the default parallel PETSc format). For good matrix assembly performance 4366 the user should preallocate the matrix storage by setting the parameters 4367 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4368 4369 Collective 4370 4371 Input Parameters: 4372 + comm - MPI communicator 4373 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4374 This value should be the same as the local size used in creating the 4375 y vector for the matrix-vector product y = Ax. 4376 . n - This value should be the same as the local size used in creating the 4377 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4378 calculated if N is given) For square matrices n is almost always m. 4379 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4380 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4381 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4382 (same value is used for all local rows) 4383 . d_nnz - array containing the number of nonzeros in the various rows of the 4384 DIAGONAL portion of the local submatrix (possibly different for each row) 4385 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4386 The size of this array is equal to the number of local rows, i.e 'm'. 4387 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4388 submatrix (same value is used for all local rows). 4389 - o_nnz - array containing the number of nonzeros in the various rows of the 4390 OFF-DIAGONAL portion of the local submatrix (possibly different for 4391 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4392 structure. The size of this array is equal to the number 4393 of local rows, i.e 'm'. 4394 4395 Output Parameter: 4396 . A - the matrix 4397 4398 Options Database Keys: 4399 + -mat_no_inode - Do not use inodes 4400 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4401 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4402 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4403 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4404 4405 Level: intermediate 4406 4407 Notes: 4408 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4409 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4410 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4411 4412 If the *_nnz parameter is given then the *_nz parameter is ignored 4413 4414 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4415 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4416 storage requirements for this matrix. 4417 4418 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4419 processor than it must be used on all processors that share the object for 4420 that argument. 4421 4422 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4423 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4424 4425 The user MUST specify either the local or global matrix dimensions 4426 (possibly both). 4427 4428 The parallel matrix is partitioned across processors such that the 4429 first `m0` rows belong to process 0, the next `m1` rows belong to 4430 process 1, the next `m2` rows belong to process 2, etc., where 4431 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4432 values corresponding to [m x N] submatrix. 4433 4434 The columns are logically partitioned with the n0 columns belonging 4435 to 0th partition, the next n1 columns belonging to the next 4436 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4437 4438 The DIAGONAL portion of the local submatrix on any given processor 4439 is the submatrix corresponding to the rows and columns m,n 4440 corresponding to the given processor. i.e diagonal matrix on 4441 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4442 etc. The remaining portion of the local submatrix [m x (N-n)] 4443 constitute the OFF-DIAGONAL portion. The example below better 4444 illustrates this concept. 4445 4446 For a square global matrix we define each processor's diagonal portion 4447 to be its local rows and the corresponding columns (a square submatrix); 4448 each processor's off-diagonal portion encompasses the remainder of the 4449 local matrix (a rectangular submatrix). 4450 4451 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4452 4453 When calling this routine with a single process communicator, a matrix of 4454 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4455 type of communicator, use the construction mechanism 4456 .vb 4457 MatCreate(..., &A); 4458 MatSetType(A, MATMPIAIJ); 4459 MatSetSizes(A, m, n, M, N); 4460 MatMPIAIJSetPreallocation(A, ...); 4461 .ve 4462 4463 By default, this format uses inodes (identical nodes) when possible. 4464 We search for consecutive rows with the same nonzero structure, thereby 4465 reusing matrix information to achieve increased efficiency. 4466 4467 Example Usage: 4468 Consider the following 8x8 matrix with 34 non-zero values, that is 4469 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4470 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4471 as follows 4472 4473 .vb 4474 1 2 0 | 0 3 0 | 0 4 4475 Proc0 0 5 6 | 7 0 0 | 8 0 4476 9 0 10 | 11 0 0 | 12 0 4477 ------------------------------------- 4478 13 0 14 | 15 16 17 | 0 0 4479 Proc1 0 18 0 | 19 20 21 | 0 0 4480 0 0 0 | 22 23 0 | 24 0 4481 ------------------------------------- 4482 Proc2 25 26 27 | 0 0 28 | 29 0 4483 30 0 0 | 31 32 33 | 0 34 4484 .ve 4485 4486 This can be represented as a collection of submatrices as 4487 4488 .vb 4489 A B C 4490 D E F 4491 G H I 4492 .ve 4493 4494 Where the submatrices A,B,C are owned by proc0, D,E,F are 4495 owned by proc1, G,H,I are owned by proc2. 4496 4497 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4498 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4499 The 'M','N' parameters are 8,8, and have the same values on all procs. 4500 4501 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4502 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4503 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4504 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4505 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4506 matrix, and [DF] as another SeqAIJ matrix. 4507 4508 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4509 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4510 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4511 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4512 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4513 In this case, the values of `d_nz`,`o_nz` are 4514 .vb 4515 proc0 dnz = 2, o_nz = 2 4516 proc1 dnz = 3, o_nz = 2 4517 proc2 dnz = 1, o_nz = 4 4518 .ve 4519 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4520 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4521 for proc3. i.e we are using 12+15+10=37 storage locations to store 4522 34 values. 4523 4524 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4525 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4526 In the above case the values for d_nnz,o_nnz are 4527 .vb 4528 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4529 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4530 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4531 .ve 4532 Here the space allocated is sum of all the above values i.e 34, and 4533 hence pre-allocation is perfect. 4534 4535 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4536 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4537 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4538 @*/ 4539 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4540 { 4541 PetscMPIInt size; 4542 4543 PetscFunctionBegin; 4544 PetscCall(MatCreate(comm, A)); 4545 PetscCall(MatSetSizes(*A, m, n, M, N)); 4546 PetscCallMPI(MPI_Comm_size(comm, &size)); 4547 if (size > 1) { 4548 PetscCall(MatSetType(*A, MATMPIAIJ)); 4549 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4550 } else { 4551 PetscCall(MatSetType(*A, MATSEQAIJ)); 4552 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4553 } 4554 PetscFunctionReturn(PETSC_SUCCESS); 4555 } 4556 4557 /*MC 4558 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4559 4560 Synopsis: 4561 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4562 4563 Not Collective 4564 4565 Input Parameter: 4566 . A - the `MATMPIAIJ` matrix 4567 4568 Output Parameters: 4569 + Ad - the diagonal portion of the matrix 4570 . Ao - the off-diagonal portion of the matrix 4571 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4572 - ierr - error code 4573 4574 Level: advanced 4575 4576 Note: 4577 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4578 4579 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4580 M*/ 4581 4582 /*MC 4583 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4584 4585 Synopsis: 4586 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4587 4588 Not Collective 4589 4590 Input Parameters: 4591 + A - the `MATMPIAIJ` matrix 4592 . Ad - the diagonal portion of the matrix 4593 . Ao - the off-diagonal portion of the matrix 4594 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4595 - ierr - error code 4596 4597 Level: advanced 4598 4599 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4600 M*/ 4601 4602 /*@C 4603 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4604 4605 Not Collective 4606 4607 Input Parameter: 4608 . A - The `MATMPIAIJ` matrix 4609 4610 Output Parameters: 4611 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4612 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4613 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4614 4615 Level: intermediate 4616 4617 Note: 4618 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4619 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4620 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4621 local column numbers to global column numbers in the original matrix. 4622 4623 Fortran Notes: 4624 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4625 4626 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4627 @*/ 4628 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4629 { 4630 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4631 PetscBool flg; 4632 4633 PetscFunctionBegin; 4634 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4635 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4636 if (Ad) *Ad = a->A; 4637 if (Ao) *Ao = a->B; 4638 if (colmap) *colmap = a->garray; 4639 PetscFunctionReturn(PETSC_SUCCESS); 4640 } 4641 4642 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4643 { 4644 PetscInt m, N, i, rstart, nnz, Ii; 4645 PetscInt *indx; 4646 PetscScalar *values; 4647 MatType rootType; 4648 4649 PetscFunctionBegin; 4650 PetscCall(MatGetSize(inmat, &m, &N)); 4651 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4652 PetscInt *dnz, *onz, sum, bs, cbs; 4653 4654 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4655 /* Check sum(n) = N */ 4656 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4657 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4658 4659 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4660 rstart -= m; 4661 4662 MatPreallocateBegin(comm, m, n, dnz, onz); 4663 for (i = 0; i < m; i++) { 4664 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4665 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4666 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4667 } 4668 4669 PetscCall(MatCreate(comm, outmat)); 4670 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4671 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4672 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4673 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4674 PetscCall(MatSetType(*outmat, rootType)); 4675 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4676 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4677 MatPreallocateEnd(dnz, onz); 4678 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4679 } 4680 4681 /* numeric phase */ 4682 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4683 for (i = 0; i < m; i++) { 4684 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4685 Ii = i + rstart; 4686 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4687 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4688 } 4689 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4690 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4691 PetscFunctionReturn(PETSC_SUCCESS); 4692 } 4693 4694 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4695 { 4696 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4697 4698 PetscFunctionBegin; 4699 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4700 PetscCall(PetscFree(merge->id_r)); 4701 PetscCall(PetscFree(merge->len_s)); 4702 PetscCall(PetscFree(merge->len_r)); 4703 PetscCall(PetscFree(merge->bi)); 4704 PetscCall(PetscFree(merge->bj)); 4705 PetscCall(PetscFree(merge->buf_ri[0])); 4706 PetscCall(PetscFree(merge->buf_ri)); 4707 PetscCall(PetscFree(merge->buf_rj[0])); 4708 PetscCall(PetscFree(merge->buf_rj)); 4709 PetscCall(PetscFree(merge->coi)); 4710 PetscCall(PetscFree(merge->coj)); 4711 PetscCall(PetscFree(merge->owners_co)); 4712 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4713 PetscCall(PetscFree(merge)); 4714 PetscFunctionReturn(PETSC_SUCCESS); 4715 } 4716 4717 #include <../src/mat/utils/freespace.h> 4718 #include <petscbt.h> 4719 4720 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4721 { 4722 MPI_Comm comm; 4723 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4724 PetscMPIInt size, rank, taga, *len_s; 4725 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4726 PetscMPIInt proc, k; 4727 PetscInt **buf_ri, **buf_rj; 4728 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4729 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4730 MPI_Request *s_waits, *r_waits; 4731 MPI_Status *status; 4732 const MatScalar *aa, *a_a; 4733 MatScalar **abuf_r, *ba_i; 4734 Mat_Merge_SeqsToMPI *merge; 4735 PetscContainer container; 4736 4737 PetscFunctionBegin; 4738 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4739 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4740 4741 PetscCallMPI(MPI_Comm_size(comm, &size)); 4742 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4743 4744 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4745 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4746 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4747 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4748 aa = a_a; 4749 4750 bi = merge->bi; 4751 bj = merge->bj; 4752 buf_ri = merge->buf_ri; 4753 buf_rj = merge->buf_rj; 4754 4755 PetscCall(PetscMalloc1(size, &status)); 4756 owners = merge->rowmap->range; 4757 len_s = merge->len_s; 4758 4759 /* send and recv matrix values */ 4760 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4761 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4762 4763 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4764 for (proc = 0, k = 0; proc < size; proc++) { 4765 if (!len_s[proc]) continue; 4766 i = owners[proc]; 4767 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4768 k++; 4769 } 4770 4771 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4772 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4773 PetscCall(PetscFree(status)); 4774 4775 PetscCall(PetscFree(s_waits)); 4776 PetscCall(PetscFree(r_waits)); 4777 4778 /* insert mat values of mpimat */ 4779 PetscCall(PetscMalloc1(N, &ba_i)); 4780 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4781 4782 for (k = 0; k < merge->nrecv; k++) { 4783 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4784 nrows = *buf_ri_k[k]; 4785 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4786 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4787 } 4788 4789 /* set values of ba */ 4790 m = merge->rowmap->n; 4791 for (i = 0; i < m; i++) { 4792 arow = owners[rank] + i; 4793 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4794 bnzi = bi[i + 1] - bi[i]; 4795 PetscCall(PetscArrayzero(ba_i, bnzi)); 4796 4797 /* add local non-zero vals of this proc's seqmat into ba */ 4798 anzi = ai[arow + 1] - ai[arow]; 4799 aj = a->j + ai[arow]; 4800 aa = a_a + ai[arow]; 4801 nextaj = 0; 4802 for (j = 0; nextaj < anzi; j++) { 4803 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4804 ba_i[j] += aa[nextaj++]; 4805 } 4806 } 4807 4808 /* add received vals into ba */ 4809 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4810 /* i-th row */ 4811 if (i == *nextrow[k]) { 4812 anzi = *(nextai[k] + 1) - *nextai[k]; 4813 aj = buf_rj[k] + *nextai[k]; 4814 aa = abuf_r[k] + *nextai[k]; 4815 nextaj = 0; 4816 for (j = 0; nextaj < anzi; j++) { 4817 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4818 ba_i[j] += aa[nextaj++]; 4819 } 4820 } 4821 nextrow[k]++; 4822 nextai[k]++; 4823 } 4824 } 4825 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4826 } 4827 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4828 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4829 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4830 4831 PetscCall(PetscFree(abuf_r[0])); 4832 PetscCall(PetscFree(abuf_r)); 4833 PetscCall(PetscFree(ba_i)); 4834 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4835 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4836 PetscFunctionReturn(PETSC_SUCCESS); 4837 } 4838 4839 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4840 { 4841 Mat B_mpi; 4842 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4843 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4844 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4845 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4846 PetscInt len, *dnz, *onz, bs, cbs; 4847 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4848 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4849 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4850 MPI_Status *status; 4851 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4852 PetscBT lnkbt; 4853 Mat_Merge_SeqsToMPI *merge; 4854 PetscContainer container; 4855 4856 PetscFunctionBegin; 4857 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4858 4859 /* make sure it is a PETSc comm */ 4860 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4861 PetscCallMPI(MPI_Comm_size(comm, &size)); 4862 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4863 4864 PetscCall(PetscNew(&merge)); 4865 PetscCall(PetscMalloc1(size, &status)); 4866 4867 /* determine row ownership */ 4868 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4869 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4870 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4871 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4872 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4873 PetscCall(PetscMalloc1(size, &len_si)); 4874 PetscCall(PetscMalloc1(size, &merge->len_s)); 4875 4876 m = merge->rowmap->n; 4877 owners = merge->rowmap->range; 4878 4879 /* determine the number of messages to send, their lengths */ 4880 len_s = merge->len_s; 4881 4882 len = 0; /* length of buf_si[] */ 4883 merge->nsend = 0; 4884 for (PetscMPIInt proc = 0; proc < size; proc++) { 4885 len_si[proc] = 0; 4886 if (proc == rank) { 4887 len_s[proc] = 0; 4888 } else { 4889 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4890 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4891 } 4892 if (len_s[proc]) { 4893 merge->nsend++; 4894 nrows = 0; 4895 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4896 if (ai[i + 1] > ai[i]) nrows++; 4897 } 4898 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4899 len += len_si[proc]; 4900 } 4901 } 4902 4903 /* determine the number and length of messages to receive for ij-structure */ 4904 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4905 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4906 4907 /* post the Irecv of j-structure */ 4908 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4909 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4910 4911 /* post the Isend of j-structure */ 4912 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4913 4914 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4915 if (!len_s[proc]) continue; 4916 i = owners[proc]; 4917 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4918 k++; 4919 } 4920 4921 /* receives and sends of j-structure are complete */ 4922 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4923 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4924 4925 /* send and recv i-structure */ 4926 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4927 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4928 4929 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4930 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4931 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4932 if (!len_s[proc]) continue; 4933 /* form outgoing message for i-structure: 4934 buf_si[0]: nrows to be sent 4935 [1:nrows]: row index (global) 4936 [nrows+1:2*nrows+1]: i-structure index 4937 */ 4938 nrows = len_si[proc] / 2 - 1; 4939 buf_si_i = buf_si + nrows + 1; 4940 buf_si[0] = nrows; 4941 buf_si_i[0] = 0; 4942 nrows = 0; 4943 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4944 anzi = ai[i + 1] - ai[i]; 4945 if (anzi) { 4946 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4947 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4948 nrows++; 4949 } 4950 } 4951 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4952 k++; 4953 buf_si += len_si[proc]; 4954 } 4955 4956 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4957 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4958 4959 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4960 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4961 4962 PetscCall(PetscFree(len_si)); 4963 PetscCall(PetscFree(len_ri)); 4964 PetscCall(PetscFree(rj_waits)); 4965 PetscCall(PetscFree2(si_waits, sj_waits)); 4966 PetscCall(PetscFree(ri_waits)); 4967 PetscCall(PetscFree(buf_s)); 4968 PetscCall(PetscFree(status)); 4969 4970 /* compute a local seq matrix in each processor */ 4971 /* allocate bi array and free space for accumulating nonzero column info */ 4972 PetscCall(PetscMalloc1(m + 1, &bi)); 4973 bi[0] = 0; 4974 4975 /* create and initialize a linked list */ 4976 nlnk = N + 1; 4977 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4978 4979 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4980 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4981 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4982 4983 current_space = free_space; 4984 4985 /* determine symbolic info for each local row */ 4986 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4987 4988 for (k = 0; k < merge->nrecv; k++) { 4989 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4990 nrows = *buf_ri_k[k]; 4991 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4992 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4993 } 4994 4995 MatPreallocateBegin(comm, m, n, dnz, onz); 4996 len = 0; 4997 for (i = 0; i < m; i++) { 4998 bnzi = 0; 4999 /* add local non-zero cols of this proc's seqmat into lnk */ 5000 arow = owners[rank] + i; 5001 anzi = ai[arow + 1] - ai[arow]; 5002 aj = a->j + ai[arow]; 5003 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5004 bnzi += nlnk; 5005 /* add received col data into lnk */ 5006 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5007 if (i == *nextrow[k]) { /* i-th row */ 5008 anzi = *(nextai[k] + 1) - *nextai[k]; 5009 aj = buf_rj[k] + *nextai[k]; 5010 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5011 bnzi += nlnk; 5012 nextrow[k]++; 5013 nextai[k]++; 5014 } 5015 } 5016 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5017 5018 /* if free space is not available, make more free space */ 5019 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5020 /* copy data into free space, then initialize lnk */ 5021 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5022 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5023 5024 current_space->array += bnzi; 5025 current_space->local_used += bnzi; 5026 current_space->local_remaining -= bnzi; 5027 5028 bi[i + 1] = bi[i] + bnzi; 5029 } 5030 5031 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5032 5033 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5034 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5035 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5036 5037 /* create symbolic parallel matrix B_mpi */ 5038 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5039 PetscCall(MatCreate(comm, &B_mpi)); 5040 if (n == PETSC_DECIDE) { 5041 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5042 } else { 5043 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5044 } 5045 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5046 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5047 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5048 MatPreallocateEnd(dnz, onz); 5049 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5050 5051 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5052 B_mpi->assembled = PETSC_FALSE; 5053 merge->bi = bi; 5054 merge->bj = bj; 5055 merge->buf_ri = buf_ri; 5056 merge->buf_rj = buf_rj; 5057 merge->coi = NULL; 5058 merge->coj = NULL; 5059 merge->owners_co = NULL; 5060 5061 PetscCall(PetscCommDestroy(&comm)); 5062 5063 /* attach the supporting struct to B_mpi for reuse */ 5064 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5065 PetscCall(PetscContainerSetPointer(container, merge)); 5066 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5067 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5068 PetscCall(PetscContainerDestroy(&container)); 5069 *mpimat = B_mpi; 5070 5071 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5072 PetscFunctionReturn(PETSC_SUCCESS); 5073 } 5074 5075 /*@ 5076 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5077 matrices from each processor 5078 5079 Collective 5080 5081 Input Parameters: 5082 + comm - the communicators the parallel matrix will live on 5083 . seqmat - the input sequential matrices 5084 . m - number of local rows (or `PETSC_DECIDE`) 5085 . n - number of local columns (or `PETSC_DECIDE`) 5086 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5087 5088 Output Parameter: 5089 . mpimat - the parallel matrix generated 5090 5091 Level: advanced 5092 5093 Note: 5094 The dimensions of the sequential matrix in each processor MUST be the same. 5095 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5096 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5097 5098 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5099 @*/ 5100 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5101 { 5102 PetscMPIInt size; 5103 5104 PetscFunctionBegin; 5105 PetscCallMPI(MPI_Comm_size(comm, &size)); 5106 if (size == 1) { 5107 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5108 if (scall == MAT_INITIAL_MATRIX) { 5109 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5110 } else { 5111 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5112 } 5113 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5114 PetscFunctionReturn(PETSC_SUCCESS); 5115 } 5116 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5117 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5118 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5119 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5120 PetscFunctionReturn(PETSC_SUCCESS); 5121 } 5122 5123 /*@ 5124 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5125 5126 Not Collective 5127 5128 Input Parameter: 5129 . A - the matrix 5130 5131 Output Parameter: 5132 . A_loc - the local sequential matrix generated 5133 5134 Level: developer 5135 5136 Notes: 5137 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5138 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5139 `n` is the global column count obtained with `MatGetSize()` 5140 5141 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5142 5143 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5144 5145 Destroy the matrix with `MatDestroy()` 5146 5147 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5148 @*/ 5149 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5150 { 5151 PetscBool mpi; 5152 5153 PetscFunctionBegin; 5154 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5155 if (mpi) { 5156 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5157 } else { 5158 *A_loc = A; 5159 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5160 } 5161 PetscFunctionReturn(PETSC_SUCCESS); 5162 } 5163 5164 /*@ 5165 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5166 5167 Not Collective 5168 5169 Input Parameters: 5170 + A - the matrix 5171 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5172 5173 Output Parameter: 5174 . A_loc - the local sequential matrix generated 5175 5176 Level: developer 5177 5178 Notes: 5179 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5180 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5181 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5182 5183 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5184 5185 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5186 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5187 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5188 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5189 5190 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5191 @*/ 5192 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5193 { 5194 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5195 Mat_SeqAIJ *mat, *a, *b; 5196 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5197 const PetscScalar *aa, *ba, *aav, *bav; 5198 PetscScalar *ca, *cam; 5199 PetscMPIInt size; 5200 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5201 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5202 PetscBool match; 5203 5204 PetscFunctionBegin; 5205 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5206 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5207 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5208 if (size == 1) { 5209 if (scall == MAT_INITIAL_MATRIX) { 5210 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5211 *A_loc = mpimat->A; 5212 } else if (scall == MAT_REUSE_MATRIX) { 5213 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5214 } 5215 PetscFunctionReturn(PETSC_SUCCESS); 5216 } 5217 5218 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5219 a = (Mat_SeqAIJ *)mpimat->A->data; 5220 b = (Mat_SeqAIJ *)mpimat->B->data; 5221 ai = a->i; 5222 aj = a->j; 5223 bi = b->i; 5224 bj = b->j; 5225 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5226 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5227 aa = aav; 5228 ba = bav; 5229 if (scall == MAT_INITIAL_MATRIX) { 5230 PetscCall(PetscMalloc1(1 + am, &ci)); 5231 ci[0] = 0; 5232 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5233 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5234 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5235 k = 0; 5236 for (i = 0; i < am; i++) { 5237 ncols_o = bi[i + 1] - bi[i]; 5238 ncols_d = ai[i + 1] - ai[i]; 5239 /* off-diagonal portion of A */ 5240 for (jo = 0; jo < ncols_o; jo++) { 5241 col = cmap[*bj]; 5242 if (col >= cstart) break; 5243 cj[k] = col; 5244 bj++; 5245 ca[k++] = *ba++; 5246 } 5247 /* diagonal portion of A */ 5248 for (j = 0; j < ncols_d; j++) { 5249 cj[k] = cstart + *aj++; 5250 ca[k++] = *aa++; 5251 } 5252 /* off-diagonal portion of A */ 5253 for (j = jo; j < ncols_o; j++) { 5254 cj[k] = cmap[*bj++]; 5255 ca[k++] = *ba++; 5256 } 5257 } 5258 /* put together the new matrix */ 5259 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5260 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5261 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5262 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5263 mat->free_a = PETSC_TRUE; 5264 mat->free_ij = PETSC_TRUE; 5265 mat->nonew = 0; 5266 } else if (scall == MAT_REUSE_MATRIX) { 5267 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5268 ci = mat->i; 5269 cj = mat->j; 5270 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5271 for (i = 0; i < am; i++) { 5272 /* off-diagonal portion of A */ 5273 ncols_o = bi[i + 1] - bi[i]; 5274 for (jo = 0; jo < ncols_o; jo++) { 5275 col = cmap[*bj]; 5276 if (col >= cstart) break; 5277 *cam++ = *ba++; 5278 bj++; 5279 } 5280 /* diagonal portion of A */ 5281 ncols_d = ai[i + 1] - ai[i]; 5282 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5283 /* off-diagonal portion of A */ 5284 for (j = jo; j < ncols_o; j++) { 5285 *cam++ = *ba++; 5286 bj++; 5287 } 5288 } 5289 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5290 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5291 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5292 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5293 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5294 PetscFunctionReturn(PETSC_SUCCESS); 5295 } 5296 5297 /*@ 5298 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5299 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5300 5301 Not Collective 5302 5303 Input Parameters: 5304 + A - the matrix 5305 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5306 5307 Output Parameters: 5308 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5309 - A_loc - the local sequential matrix generated 5310 5311 Level: developer 5312 5313 Note: 5314 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5315 part, then those associated with the off-diagonal part (in its local ordering) 5316 5317 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5318 @*/ 5319 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5320 { 5321 Mat Ao, Ad; 5322 const PetscInt *cmap; 5323 PetscMPIInt size; 5324 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5325 5326 PetscFunctionBegin; 5327 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5328 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5329 if (size == 1) { 5330 if (scall == MAT_INITIAL_MATRIX) { 5331 PetscCall(PetscObjectReference((PetscObject)Ad)); 5332 *A_loc = Ad; 5333 } else if (scall == MAT_REUSE_MATRIX) { 5334 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5335 } 5336 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5337 PetscFunctionReturn(PETSC_SUCCESS); 5338 } 5339 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5340 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5341 if (f) { 5342 PetscCall((*f)(A, scall, glob, A_loc)); 5343 } else { 5344 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5345 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5346 Mat_SeqAIJ *c; 5347 PetscInt *ai = a->i, *aj = a->j; 5348 PetscInt *bi = b->i, *bj = b->j; 5349 PetscInt *ci, *cj; 5350 const PetscScalar *aa, *ba; 5351 PetscScalar *ca; 5352 PetscInt i, j, am, dn, on; 5353 5354 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5355 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5356 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5357 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5358 if (scall == MAT_INITIAL_MATRIX) { 5359 PetscInt k; 5360 PetscCall(PetscMalloc1(1 + am, &ci)); 5361 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5362 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5363 ci[0] = 0; 5364 for (i = 0, k = 0; i < am; i++) { 5365 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5366 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5367 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5368 /* diagonal portion of A */ 5369 for (j = 0; j < ncols_d; j++, k++) { 5370 cj[k] = *aj++; 5371 ca[k] = *aa++; 5372 } 5373 /* off-diagonal portion of A */ 5374 for (j = 0; j < ncols_o; j++, k++) { 5375 cj[k] = dn + *bj++; 5376 ca[k] = *ba++; 5377 } 5378 } 5379 /* put together the new matrix */ 5380 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5381 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5382 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5383 c = (Mat_SeqAIJ *)(*A_loc)->data; 5384 c->free_a = PETSC_TRUE; 5385 c->free_ij = PETSC_TRUE; 5386 c->nonew = 0; 5387 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5388 } else if (scall == MAT_REUSE_MATRIX) { 5389 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5390 for (i = 0; i < am; i++) { 5391 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5392 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5393 /* diagonal portion of A */ 5394 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5395 /* off-diagonal portion of A */ 5396 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5397 } 5398 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5399 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5400 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5401 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5402 if (glob) { 5403 PetscInt cst, *gidx; 5404 5405 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5406 PetscCall(PetscMalloc1(dn + on, &gidx)); 5407 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5408 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5409 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5410 } 5411 } 5412 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5413 PetscFunctionReturn(PETSC_SUCCESS); 5414 } 5415 5416 /*@C 5417 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5418 5419 Not Collective 5420 5421 Input Parameters: 5422 + A - the matrix 5423 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5424 . row - index set of rows to extract (or `NULL`) 5425 - col - index set of columns to extract (or `NULL`) 5426 5427 Output Parameter: 5428 . A_loc - the local sequential matrix generated 5429 5430 Level: developer 5431 5432 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5433 @*/ 5434 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5435 { 5436 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5437 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5438 IS isrowa, iscola; 5439 Mat *aloc; 5440 PetscBool match; 5441 5442 PetscFunctionBegin; 5443 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5444 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5445 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5446 if (!row) { 5447 start = A->rmap->rstart; 5448 end = A->rmap->rend; 5449 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5450 } else { 5451 isrowa = *row; 5452 } 5453 if (!col) { 5454 start = A->cmap->rstart; 5455 cmap = a->garray; 5456 nzA = a->A->cmap->n; 5457 nzB = a->B->cmap->n; 5458 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5459 ncols = 0; 5460 for (i = 0; i < nzB; i++) { 5461 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5462 else break; 5463 } 5464 imark = i; 5465 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5466 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5467 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5468 } else { 5469 iscola = *col; 5470 } 5471 if (scall != MAT_INITIAL_MATRIX) { 5472 PetscCall(PetscMalloc1(1, &aloc)); 5473 aloc[0] = *A_loc; 5474 } 5475 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5476 if (!col) { /* attach global id of condensed columns */ 5477 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5478 } 5479 *A_loc = aloc[0]; 5480 PetscCall(PetscFree(aloc)); 5481 if (!row) PetscCall(ISDestroy(&isrowa)); 5482 if (!col) PetscCall(ISDestroy(&iscola)); 5483 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5484 PetscFunctionReturn(PETSC_SUCCESS); 5485 } 5486 5487 /* 5488 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5489 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5490 * on a global size. 5491 * */ 5492 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5493 { 5494 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5495 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5496 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5497 PetscMPIInt owner; 5498 PetscSFNode *iremote, *oiremote; 5499 const PetscInt *lrowindices; 5500 PetscSF sf, osf; 5501 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5502 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5503 MPI_Comm comm; 5504 ISLocalToGlobalMapping mapping; 5505 const PetscScalar *pd_a, *po_a; 5506 5507 PetscFunctionBegin; 5508 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5509 /* plocalsize is the number of roots 5510 * nrows is the number of leaves 5511 * */ 5512 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5513 PetscCall(ISGetLocalSize(rows, &nrows)); 5514 PetscCall(PetscCalloc1(nrows, &iremote)); 5515 PetscCall(ISGetIndices(rows, &lrowindices)); 5516 for (i = 0; i < nrows; i++) { 5517 /* Find a remote index and an owner for a row 5518 * The row could be local or remote 5519 * */ 5520 owner = 0; 5521 lidx = 0; 5522 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5523 iremote[i].index = lidx; 5524 iremote[i].rank = owner; 5525 } 5526 /* Create SF to communicate how many nonzero columns for each row */ 5527 PetscCall(PetscSFCreate(comm, &sf)); 5528 /* SF will figure out the number of nonzero columns for each row, and their 5529 * offsets 5530 * */ 5531 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5532 PetscCall(PetscSFSetFromOptions(sf)); 5533 PetscCall(PetscSFSetUp(sf)); 5534 5535 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5536 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5537 PetscCall(PetscCalloc1(nrows, &pnnz)); 5538 roffsets[0] = 0; 5539 roffsets[1] = 0; 5540 for (i = 0; i < plocalsize; i++) { 5541 /* diagonal */ 5542 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5543 /* off-diagonal */ 5544 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5545 /* compute offsets so that we relative location for each row */ 5546 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5547 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5548 } 5549 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5550 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5551 /* 'r' means root, and 'l' means leaf */ 5552 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5553 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5554 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5555 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5556 PetscCall(PetscSFDestroy(&sf)); 5557 PetscCall(PetscFree(roffsets)); 5558 PetscCall(PetscFree(nrcols)); 5559 dntotalcols = 0; 5560 ontotalcols = 0; 5561 ncol = 0; 5562 for (i = 0; i < nrows; i++) { 5563 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5564 ncol = PetscMax(pnnz[i], ncol); 5565 /* diagonal */ 5566 dntotalcols += nlcols[i * 2 + 0]; 5567 /* off-diagonal */ 5568 ontotalcols += nlcols[i * 2 + 1]; 5569 } 5570 /* We do not need to figure the right number of columns 5571 * since all the calculations will be done by going through the raw data 5572 * */ 5573 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5574 PetscCall(MatSetUp(*P_oth)); 5575 PetscCall(PetscFree(pnnz)); 5576 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5577 /* diagonal */ 5578 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5579 /* off-diagonal */ 5580 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5581 /* diagonal */ 5582 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5583 /* off-diagonal */ 5584 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5585 dntotalcols = 0; 5586 ontotalcols = 0; 5587 ntotalcols = 0; 5588 for (i = 0; i < nrows; i++) { 5589 owner = 0; 5590 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5591 /* Set iremote for diag matrix */ 5592 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5593 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5594 iremote[dntotalcols].rank = owner; 5595 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5596 ilocal[dntotalcols++] = ntotalcols++; 5597 } 5598 /* off-diagonal */ 5599 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5600 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5601 oiremote[ontotalcols].rank = owner; 5602 oilocal[ontotalcols++] = ntotalcols++; 5603 } 5604 } 5605 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5606 PetscCall(PetscFree(loffsets)); 5607 PetscCall(PetscFree(nlcols)); 5608 PetscCall(PetscSFCreate(comm, &sf)); 5609 /* P serves as roots and P_oth is leaves 5610 * Diag matrix 5611 * */ 5612 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5613 PetscCall(PetscSFSetFromOptions(sf)); 5614 PetscCall(PetscSFSetUp(sf)); 5615 5616 PetscCall(PetscSFCreate(comm, &osf)); 5617 /* off-diagonal */ 5618 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5619 PetscCall(PetscSFSetFromOptions(osf)); 5620 PetscCall(PetscSFSetUp(osf)); 5621 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5622 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5623 /* operate on the matrix internal data to save memory */ 5624 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5625 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5626 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5627 /* Convert to global indices for diag matrix */ 5628 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5629 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5630 /* We want P_oth store global indices */ 5631 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5632 /* Use memory scalable approach */ 5633 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5634 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5635 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5636 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5637 /* Convert back to local indices */ 5638 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5639 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5640 nout = 0; 5641 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5642 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5643 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5644 /* Exchange values */ 5645 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5646 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5647 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5648 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5649 /* Stop PETSc from shrinking memory */ 5650 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5651 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5652 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5653 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5654 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5655 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5656 PetscCall(PetscSFDestroy(&sf)); 5657 PetscCall(PetscSFDestroy(&osf)); 5658 PetscFunctionReturn(PETSC_SUCCESS); 5659 } 5660 5661 /* 5662 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5663 * This supports MPIAIJ and MAIJ 5664 * */ 5665 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5666 { 5667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5668 Mat_SeqAIJ *p_oth; 5669 IS rows, map; 5670 PetscHMapI hamp; 5671 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5672 MPI_Comm comm; 5673 PetscSF sf, osf; 5674 PetscBool has; 5675 5676 PetscFunctionBegin; 5677 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5678 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5679 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5680 * and then create a submatrix (that often is an overlapping matrix) 5681 * */ 5682 if (reuse == MAT_INITIAL_MATRIX) { 5683 /* Use a hash table to figure out unique keys */ 5684 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5685 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5686 count = 0; 5687 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5688 for (i = 0; i < a->B->cmap->n; i++) { 5689 key = a->garray[i] / dof; 5690 PetscCall(PetscHMapIHas(hamp, key, &has)); 5691 if (!has) { 5692 mapping[i] = count; 5693 PetscCall(PetscHMapISet(hamp, key, count++)); 5694 } else { 5695 /* Current 'i' has the same value the previous step */ 5696 mapping[i] = count - 1; 5697 } 5698 } 5699 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5700 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5701 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5702 PetscCall(PetscCalloc1(htsize, &rowindices)); 5703 off = 0; 5704 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5705 PetscCall(PetscHMapIDestroy(&hamp)); 5706 PetscCall(PetscSortInt(htsize, rowindices)); 5707 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5708 /* In case, the matrix was already created but users want to recreate the matrix */ 5709 PetscCall(MatDestroy(P_oth)); 5710 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5711 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5712 PetscCall(ISDestroy(&map)); 5713 PetscCall(ISDestroy(&rows)); 5714 } else if (reuse == MAT_REUSE_MATRIX) { 5715 /* If matrix was already created, we simply update values using SF objects 5716 * that as attached to the matrix earlier. 5717 */ 5718 const PetscScalar *pd_a, *po_a; 5719 5720 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5721 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5722 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5723 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5724 /* Update values in place */ 5725 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5726 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5727 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5728 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5729 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5730 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5731 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5732 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5733 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5734 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5735 PetscFunctionReturn(PETSC_SUCCESS); 5736 } 5737 5738 /*@C 5739 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5740 5741 Collective 5742 5743 Input Parameters: 5744 + A - the first matrix in `MATMPIAIJ` format 5745 . B - the second matrix in `MATMPIAIJ` format 5746 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5747 5748 Output Parameters: 5749 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5750 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5751 - B_seq - the sequential matrix generated 5752 5753 Level: developer 5754 5755 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5756 @*/ 5757 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5758 { 5759 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5760 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5761 IS isrowb, iscolb; 5762 Mat *bseq = NULL; 5763 5764 PetscFunctionBegin; 5765 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5766 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5767 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5768 5769 if (scall == MAT_INITIAL_MATRIX) { 5770 start = A->cmap->rstart; 5771 cmap = a->garray; 5772 nzA = a->A->cmap->n; 5773 nzB = a->B->cmap->n; 5774 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5775 ncols = 0; 5776 for (i = 0; i < nzB; i++) { /* row < local row index */ 5777 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5778 else break; 5779 } 5780 imark = i; 5781 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5782 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5783 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5784 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5785 } else { 5786 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5787 isrowb = *rowb; 5788 iscolb = *colb; 5789 PetscCall(PetscMalloc1(1, &bseq)); 5790 bseq[0] = *B_seq; 5791 } 5792 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5793 *B_seq = bseq[0]; 5794 PetscCall(PetscFree(bseq)); 5795 if (!rowb) { 5796 PetscCall(ISDestroy(&isrowb)); 5797 } else { 5798 *rowb = isrowb; 5799 } 5800 if (!colb) { 5801 PetscCall(ISDestroy(&iscolb)); 5802 } else { 5803 *colb = iscolb; 5804 } 5805 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5806 PetscFunctionReturn(PETSC_SUCCESS); 5807 } 5808 5809 /* 5810 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5811 of the OFF-DIAGONAL portion of local A 5812 5813 Collective 5814 5815 Input Parameters: 5816 + A,B - the matrices in `MATMPIAIJ` format 5817 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5818 5819 Output Parameter: 5820 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5821 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5822 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5823 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5824 5825 Developer Note: 5826 This directly accesses information inside the VecScatter associated with the matrix-vector product 5827 for this matrix. This is not desirable.. 5828 5829 Level: developer 5830 5831 */ 5832 5833 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5834 { 5835 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5836 VecScatter ctx; 5837 MPI_Comm comm; 5838 const PetscMPIInt *rprocs, *sprocs; 5839 PetscMPIInt nrecvs, nsends; 5840 const PetscInt *srow, *rstarts, *sstarts; 5841 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5842 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5843 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5844 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5845 PetscMPIInt size, tag, rank, nreqs; 5846 5847 PetscFunctionBegin; 5848 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5849 PetscCallMPI(MPI_Comm_size(comm, &size)); 5850 5851 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5852 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5853 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5854 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5855 5856 if (size == 1) { 5857 startsj_s = NULL; 5858 bufa_ptr = NULL; 5859 *B_oth = NULL; 5860 PetscFunctionReturn(PETSC_SUCCESS); 5861 } 5862 5863 ctx = a->Mvctx; 5864 tag = ((PetscObject)ctx)->tag; 5865 5866 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5867 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5868 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5869 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5870 PetscCall(PetscMalloc1(nreqs, &reqs)); 5871 rwaits = reqs; 5872 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5873 5874 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5875 if (scall == MAT_INITIAL_MATRIX) { 5876 /* i-array */ 5877 /* post receives */ 5878 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5879 for (i = 0; i < nrecvs; i++) { 5880 rowlen = rvalues + rstarts[i] * rbs; 5881 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5882 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5883 } 5884 5885 /* pack the outgoing message */ 5886 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5887 5888 sstartsj[0] = 0; 5889 rstartsj[0] = 0; 5890 len = 0; /* total length of j or a array to be sent */ 5891 if (nsends) { 5892 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5893 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5894 } 5895 for (i = 0; i < nsends; i++) { 5896 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5897 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5898 for (j = 0; j < nrows; j++) { 5899 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5900 for (l = 0; l < sbs; l++) { 5901 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5902 5903 rowlen[j * sbs + l] = ncols; 5904 5905 len += ncols; 5906 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5907 } 5908 k++; 5909 } 5910 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5911 5912 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5913 } 5914 /* recvs and sends of i-array are completed */ 5915 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5916 PetscCall(PetscFree(svalues)); 5917 5918 /* allocate buffers for sending j and a arrays */ 5919 PetscCall(PetscMalloc1(len + 1, &bufj)); 5920 PetscCall(PetscMalloc1(len + 1, &bufa)); 5921 5922 /* create i-array of B_oth */ 5923 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5924 5925 b_othi[0] = 0; 5926 len = 0; /* total length of j or a array to be received */ 5927 k = 0; 5928 for (i = 0; i < nrecvs; i++) { 5929 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5930 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5931 for (j = 0; j < nrows; j++) { 5932 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5933 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5934 k++; 5935 } 5936 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5937 } 5938 PetscCall(PetscFree(rvalues)); 5939 5940 /* allocate space for j and a arrays of B_oth */ 5941 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5942 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5943 5944 /* j-array */ 5945 /* post receives of j-array */ 5946 for (i = 0; i < nrecvs; i++) { 5947 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5948 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5949 } 5950 5951 /* pack the outgoing message j-array */ 5952 if (nsends) k = sstarts[0]; 5953 for (i = 0; i < nsends; i++) { 5954 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5955 bufJ = bufj + sstartsj[i]; 5956 for (j = 0; j < nrows; j++) { 5957 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5958 for (ll = 0; ll < sbs; ll++) { 5959 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5960 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5961 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5962 } 5963 } 5964 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5965 } 5966 5967 /* recvs and sends of j-array are completed */ 5968 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5969 } else if (scall == MAT_REUSE_MATRIX) { 5970 sstartsj = *startsj_s; 5971 rstartsj = *startsj_r; 5972 bufa = *bufa_ptr; 5973 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5974 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5975 5976 /* a-array */ 5977 /* post receives of a-array */ 5978 for (i = 0; i < nrecvs; i++) { 5979 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5980 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5981 } 5982 5983 /* pack the outgoing message a-array */ 5984 if (nsends) k = sstarts[0]; 5985 for (i = 0; i < nsends; i++) { 5986 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5987 bufA = bufa + sstartsj[i]; 5988 for (j = 0; j < nrows; j++) { 5989 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5990 for (ll = 0; ll < sbs; ll++) { 5991 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5992 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5993 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5994 } 5995 } 5996 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5997 } 5998 /* recvs and sends of a-array are completed */ 5999 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6000 PetscCall(PetscFree(reqs)); 6001 6002 if (scall == MAT_INITIAL_MATRIX) { 6003 Mat_SeqAIJ *b_oth; 6004 6005 /* put together the new matrix */ 6006 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6007 6008 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6009 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6010 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6011 b_oth->free_a = PETSC_TRUE; 6012 b_oth->free_ij = PETSC_TRUE; 6013 b_oth->nonew = 0; 6014 6015 PetscCall(PetscFree(bufj)); 6016 if (!startsj_s || !bufa_ptr) { 6017 PetscCall(PetscFree2(sstartsj, rstartsj)); 6018 PetscCall(PetscFree(bufa_ptr)); 6019 } else { 6020 *startsj_s = sstartsj; 6021 *startsj_r = rstartsj; 6022 *bufa_ptr = bufa; 6023 } 6024 } else if (scall == MAT_REUSE_MATRIX) { 6025 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6026 } 6027 6028 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6029 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6030 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6031 PetscFunctionReturn(PETSC_SUCCESS); 6032 } 6033 6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6037 #if defined(PETSC_HAVE_MKL_SPARSE) 6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6039 #endif 6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6042 #if defined(PETSC_HAVE_ELEMENTAL) 6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6044 #endif 6045 #if defined(PETSC_HAVE_SCALAPACK) 6046 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6047 #endif 6048 #if defined(PETSC_HAVE_HYPRE) 6049 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6050 #endif 6051 #if defined(PETSC_HAVE_CUDA) 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6053 #endif 6054 #if defined(PETSC_HAVE_HIP) 6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6056 #endif 6057 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6059 #endif 6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6061 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6062 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6063 6064 /* 6065 Computes (B'*A')' since computing B*A directly is untenable 6066 6067 n p p 6068 [ ] [ ] [ ] 6069 m [ A ] * n [ B ] = m [ C ] 6070 [ ] [ ] [ ] 6071 6072 */ 6073 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6074 { 6075 Mat At, Bt, Ct; 6076 6077 PetscFunctionBegin; 6078 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6079 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6080 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6081 PetscCall(MatDestroy(&At)); 6082 PetscCall(MatDestroy(&Bt)); 6083 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6084 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6085 PetscCall(MatDestroy(&Ct)); 6086 PetscFunctionReturn(PETSC_SUCCESS); 6087 } 6088 6089 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6090 { 6091 PetscBool cisdense; 6092 6093 PetscFunctionBegin; 6094 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6095 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6096 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6097 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6098 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6099 PetscCall(MatSetUp(C)); 6100 6101 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6102 PetscFunctionReturn(PETSC_SUCCESS); 6103 } 6104 6105 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6106 { 6107 Mat_Product *product = C->product; 6108 Mat A = product->A, B = product->B; 6109 6110 PetscFunctionBegin; 6111 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6112 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6113 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6114 C->ops->productsymbolic = MatProductSymbolic_AB; 6115 PetscFunctionReturn(PETSC_SUCCESS); 6116 } 6117 6118 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6119 { 6120 Mat_Product *product = C->product; 6121 6122 PetscFunctionBegin; 6123 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6124 PetscFunctionReturn(PETSC_SUCCESS); 6125 } 6126 6127 /* 6128 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6129 6130 Input Parameters: 6131 6132 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6133 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6134 6135 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6136 6137 For Set1, j1[] contains column indices of the nonzeros. 6138 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6139 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6140 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6141 6142 Similar for Set2. 6143 6144 This routine merges the two sets of nonzeros row by row and removes repeats. 6145 6146 Output Parameters: (memory is allocated by the caller) 6147 6148 i[],j[]: the CSR of the merged matrix, which has m rows. 6149 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6150 imap2[]: similar to imap1[], but for Set2. 6151 Note we order nonzeros row-by-row and from left to right. 6152 */ 6153 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6154 { 6155 PetscInt r, m; /* Row index of mat */ 6156 PetscCount t, t1, t2, b1, e1, b2, e2; 6157 6158 PetscFunctionBegin; 6159 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6160 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6161 i[0] = 0; 6162 for (r = 0; r < m; r++) { /* Do row by row merging */ 6163 b1 = rowBegin1[r]; 6164 e1 = rowEnd1[r]; 6165 b2 = rowBegin2[r]; 6166 e2 = rowEnd2[r]; 6167 while (b1 < e1 && b2 < e2) { 6168 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6169 j[t] = j1[b1]; 6170 imap1[t1] = t; 6171 imap2[t2] = t; 6172 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6173 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6174 t1++; 6175 t2++; 6176 t++; 6177 } else if (j1[b1] < j2[b2]) { 6178 j[t] = j1[b1]; 6179 imap1[t1] = t; 6180 b1 += jmap1[t1 + 1] - jmap1[t1]; 6181 t1++; 6182 t++; 6183 } else { 6184 j[t] = j2[b2]; 6185 imap2[t2] = t; 6186 b2 += jmap2[t2 + 1] - jmap2[t2]; 6187 t2++; 6188 t++; 6189 } 6190 } 6191 /* Merge the remaining in either j1[] or j2[] */ 6192 while (b1 < e1) { 6193 j[t] = j1[b1]; 6194 imap1[t1] = t; 6195 b1 += jmap1[t1 + 1] - jmap1[t1]; 6196 t1++; 6197 t++; 6198 } 6199 while (b2 < e2) { 6200 j[t] = j2[b2]; 6201 imap2[t2] = t; 6202 b2 += jmap2[t2 + 1] - jmap2[t2]; 6203 t2++; 6204 t++; 6205 } 6206 PetscCall(PetscIntCast(t, i + r + 1)); 6207 } 6208 PetscFunctionReturn(PETSC_SUCCESS); 6209 } 6210 6211 /* 6212 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6213 6214 Input Parameters: 6215 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6216 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6217 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6218 6219 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6220 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6221 6222 Output Parameters: 6223 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6224 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6225 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6226 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6227 6228 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6229 Atot: number of entries belonging to the diagonal block. 6230 Annz: number of unique nonzeros belonging to the diagonal block. 6231 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6232 repeats (i.e., same 'i,j' pair). 6233 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6234 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6235 6236 Atot: number of entries belonging to the diagonal block 6237 Annz: number of unique nonzeros belonging to the diagonal block. 6238 6239 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6240 6241 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6242 */ 6243 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6244 { 6245 PetscInt cstart, cend, rstart, rend, row, col; 6246 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6247 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6248 PetscCount k, m, p, q, r, s, mid; 6249 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6250 6251 PetscFunctionBegin; 6252 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6253 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6254 m = rend - rstart; 6255 6256 /* Skip negative rows */ 6257 for (k = 0; k < n; k++) 6258 if (i[k] >= 0) break; 6259 6260 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6261 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6262 */ 6263 while (k < n) { 6264 row = i[k]; 6265 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6266 for (s = k; s < n; s++) 6267 if (i[s] != row) break; 6268 6269 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6270 for (p = k; p < s; p++) { 6271 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6272 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6273 } 6274 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6275 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6276 rowBegin[row - rstart] = k; 6277 rowMid[row - rstart] = mid; 6278 rowEnd[row - rstart] = s; 6279 6280 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6281 Atot += mid - k; 6282 Btot += s - mid; 6283 6284 /* Count unique nonzeros of this diag row */ 6285 for (p = k; p < mid;) { 6286 col = j[p]; 6287 do { 6288 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6289 p++; 6290 } while (p < mid && j[p] == col); 6291 Annz++; 6292 } 6293 6294 /* Count unique nonzeros of this offdiag row */ 6295 for (p = mid; p < s;) { 6296 col = j[p]; 6297 do { 6298 p++; 6299 } while (p < s && j[p] == col); 6300 Bnnz++; 6301 } 6302 k = s; 6303 } 6304 6305 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6306 PetscCall(PetscMalloc1(Atot, &Aperm)); 6307 PetscCall(PetscMalloc1(Btot, &Bperm)); 6308 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6309 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6310 6311 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6312 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6313 for (r = 0; r < m; r++) { 6314 k = rowBegin[r]; 6315 mid = rowMid[r]; 6316 s = rowEnd[r]; 6317 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6318 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6319 Atot += mid - k; 6320 Btot += s - mid; 6321 6322 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6323 for (p = k; p < mid;) { 6324 col = j[p]; 6325 q = p; 6326 do { 6327 p++; 6328 } while (p < mid && j[p] == col); 6329 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6330 Annz++; 6331 } 6332 6333 for (p = mid; p < s;) { 6334 col = j[p]; 6335 q = p; 6336 do { 6337 p++; 6338 } while (p < s && j[p] == col); 6339 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6340 Bnnz++; 6341 } 6342 } 6343 /* Output */ 6344 *Aperm_ = Aperm; 6345 *Annz_ = Annz; 6346 *Atot_ = Atot; 6347 *Ajmap_ = Ajmap; 6348 *Bperm_ = Bperm; 6349 *Bnnz_ = Bnnz; 6350 *Btot_ = Btot; 6351 *Bjmap_ = Bjmap; 6352 PetscFunctionReturn(PETSC_SUCCESS); 6353 } 6354 6355 /* 6356 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6357 6358 Input Parameters: 6359 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6360 nnz: number of unique nonzeros in the merged matrix 6361 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6362 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6363 6364 Output Parameter: (memory is allocated by the caller) 6365 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6366 6367 Example: 6368 nnz1 = 4 6369 nnz = 6 6370 imap = [1,3,4,5] 6371 jmap = [0,3,5,6,7] 6372 then, 6373 jmap_new = [0,0,3,3,5,6,7] 6374 */ 6375 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6376 { 6377 PetscCount k, p; 6378 6379 PetscFunctionBegin; 6380 jmap_new[0] = 0; 6381 p = nnz; /* p loops over jmap_new[] backwards */ 6382 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6383 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6384 } 6385 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6386 PetscFunctionReturn(PETSC_SUCCESS); 6387 } 6388 6389 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6390 { 6391 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6392 6393 PetscFunctionBegin; 6394 PetscCall(PetscSFDestroy(&coo->sf)); 6395 PetscCall(PetscFree(coo->Aperm1)); 6396 PetscCall(PetscFree(coo->Bperm1)); 6397 PetscCall(PetscFree(coo->Ajmap1)); 6398 PetscCall(PetscFree(coo->Bjmap1)); 6399 PetscCall(PetscFree(coo->Aimap2)); 6400 PetscCall(PetscFree(coo->Bimap2)); 6401 PetscCall(PetscFree(coo->Aperm2)); 6402 PetscCall(PetscFree(coo->Bperm2)); 6403 PetscCall(PetscFree(coo->Ajmap2)); 6404 PetscCall(PetscFree(coo->Bjmap2)); 6405 PetscCall(PetscFree(coo->Cperm1)); 6406 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6407 PetscCall(PetscFree(coo)); 6408 PetscFunctionReturn(PETSC_SUCCESS); 6409 } 6410 6411 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6412 { 6413 MPI_Comm comm; 6414 PetscMPIInt rank, size; 6415 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6416 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6417 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6418 PetscContainer container; 6419 MatCOOStruct_MPIAIJ *coo; 6420 6421 PetscFunctionBegin; 6422 PetscCall(PetscFree(mpiaij->garray)); 6423 PetscCall(VecDestroy(&mpiaij->lvec)); 6424 #if defined(PETSC_USE_CTABLE) 6425 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6426 #else 6427 PetscCall(PetscFree(mpiaij->colmap)); 6428 #endif 6429 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6430 mat->assembled = PETSC_FALSE; 6431 mat->was_assembled = PETSC_FALSE; 6432 6433 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6434 PetscCallMPI(MPI_Comm_size(comm, &size)); 6435 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6436 PetscCall(PetscLayoutSetUp(mat->rmap)); 6437 PetscCall(PetscLayoutSetUp(mat->cmap)); 6438 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6439 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6440 PetscCall(MatGetLocalSize(mat, &m, &n)); 6441 PetscCall(MatGetSize(mat, &M, &N)); 6442 6443 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6444 /* entries come first, then local rows, then remote rows. */ 6445 PetscCount n1 = coo_n, *perm1; 6446 PetscInt *i1 = coo_i, *j1 = coo_j; 6447 6448 PetscCall(PetscMalloc1(n1, &perm1)); 6449 for (k = 0; k < n1; k++) perm1[k] = k; 6450 6451 /* Manipulate indices so that entries with negative row or col indices will have smallest 6452 row indices, local entries will have greater but negative row indices, and remote entries 6453 will have positive row indices. 6454 */ 6455 for (k = 0; k < n1; k++) { 6456 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6457 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6458 else { 6459 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6460 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6461 } 6462 } 6463 6464 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6465 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6466 6467 /* Advance k to the first entry we need to take care of */ 6468 for (k = 0; k < n1; k++) 6469 if (i1[k] > PETSC_INT_MIN) break; 6470 PetscCount i1start = k; 6471 6472 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6473 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6474 6475 /* Send remote rows to their owner */ 6476 /* Find which rows should be sent to which remote ranks*/ 6477 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6478 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6479 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6480 const PetscInt *ranges; 6481 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6482 6483 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6484 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6485 for (k = rem; k < n1;) { 6486 PetscMPIInt owner; 6487 PetscInt firstRow, lastRow; 6488 6489 /* Locate a row range */ 6490 firstRow = i1[k]; /* first row of this owner */ 6491 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6492 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6493 6494 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6495 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6496 6497 /* All entries in [k,p) belong to this remote owner */ 6498 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6499 PetscMPIInt *sendto2; 6500 PetscInt *nentries2; 6501 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6502 6503 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6504 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6505 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6506 PetscCall(PetscFree2(sendto, nentries2)); 6507 sendto = sendto2; 6508 nentries = nentries2; 6509 maxNsend = maxNsend2; 6510 } 6511 sendto[nsend] = owner; 6512 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6513 nsend++; 6514 k = p; 6515 } 6516 6517 /* Build 1st SF to know offsets on remote to send data */ 6518 PetscSF sf1; 6519 PetscInt nroots = 1, nroots2 = 0; 6520 PetscInt nleaves = nsend, nleaves2 = 0; 6521 PetscInt *offsets; 6522 PetscSFNode *iremote; 6523 6524 PetscCall(PetscSFCreate(comm, &sf1)); 6525 PetscCall(PetscMalloc1(nsend, &iremote)); 6526 PetscCall(PetscMalloc1(nsend, &offsets)); 6527 for (k = 0; k < nsend; k++) { 6528 iremote[k].rank = sendto[k]; 6529 iremote[k].index = 0; 6530 nleaves2 += nentries[k]; 6531 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6532 } 6533 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6534 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6535 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6536 PetscCall(PetscSFDestroy(&sf1)); 6537 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6538 6539 /* Build 2nd SF to send remote COOs to their owner */ 6540 PetscSF sf2; 6541 nroots = nroots2; 6542 nleaves = nleaves2; 6543 PetscCall(PetscSFCreate(comm, &sf2)); 6544 PetscCall(PetscSFSetFromOptions(sf2)); 6545 PetscCall(PetscMalloc1(nleaves, &iremote)); 6546 p = 0; 6547 for (k = 0; k < nsend; k++) { 6548 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6549 for (q = 0; q < nentries[k]; q++, p++) { 6550 iremote[p].rank = sendto[k]; 6551 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6552 } 6553 } 6554 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6555 6556 /* Send the remote COOs to their owner */ 6557 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6558 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6559 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6560 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6561 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6562 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6563 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6564 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6565 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6566 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6567 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6568 6569 PetscCall(PetscFree(offsets)); 6570 PetscCall(PetscFree2(sendto, nentries)); 6571 6572 /* Sort received COOs by row along with the permutation array */ 6573 for (k = 0; k < n2; k++) perm2[k] = k; 6574 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6575 6576 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6577 PetscCount *Cperm1; 6578 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6579 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6580 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6581 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6582 6583 /* Support for HYPRE matrices, kind of a hack. 6584 Swap min column with diagonal so that diagonal values will go first */ 6585 PetscBool hypre; 6586 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6587 if (hypre) { 6588 PetscInt *minj; 6589 PetscBT hasdiag; 6590 6591 PetscCall(PetscBTCreate(m, &hasdiag)); 6592 PetscCall(PetscMalloc1(m, &minj)); 6593 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6594 for (k = i1start; k < rem; k++) { 6595 if (j1[k] < cstart || j1[k] >= cend) continue; 6596 const PetscInt rindex = i1[k] - rstart; 6597 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6598 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6599 } 6600 for (k = 0; k < n2; k++) { 6601 if (j2[k] < cstart || j2[k] >= cend) continue; 6602 const PetscInt rindex = i2[k] - rstart; 6603 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6604 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6605 } 6606 for (k = i1start; k < rem; k++) { 6607 const PetscInt rindex = i1[k] - rstart; 6608 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6609 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6610 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6611 } 6612 for (k = 0; k < n2; k++) { 6613 const PetscInt rindex = i2[k] - rstart; 6614 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6615 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6616 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6617 } 6618 PetscCall(PetscBTDestroy(&hasdiag)); 6619 PetscCall(PetscFree(minj)); 6620 } 6621 6622 /* Split local COOs and received COOs into diag/offdiag portions */ 6623 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6624 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6625 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6626 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6627 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6628 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6629 6630 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6631 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6632 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6633 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6634 6635 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6636 PetscInt *Ai, *Bi; 6637 PetscInt *Aj, *Bj; 6638 6639 PetscCall(PetscMalloc1(m + 1, &Ai)); 6640 PetscCall(PetscMalloc1(m + 1, &Bi)); 6641 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6642 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6643 6644 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6645 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6646 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6647 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6648 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6649 6650 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6651 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6652 6653 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6654 /* expect nonzeros in A/B most likely have local contributing entries */ 6655 PetscInt Annz = Ai[m]; 6656 PetscInt Bnnz = Bi[m]; 6657 PetscCount *Ajmap1_new, *Bjmap1_new; 6658 6659 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6660 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6661 6662 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6663 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6664 6665 PetscCall(PetscFree(Aimap1)); 6666 PetscCall(PetscFree(Ajmap1)); 6667 PetscCall(PetscFree(Bimap1)); 6668 PetscCall(PetscFree(Bjmap1)); 6669 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6670 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6671 PetscCall(PetscFree(perm1)); 6672 PetscCall(PetscFree3(i2, j2, perm2)); 6673 6674 Ajmap1 = Ajmap1_new; 6675 Bjmap1 = Bjmap1_new; 6676 6677 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6678 if (Annz < Annz1 + Annz2) { 6679 PetscInt *Aj_new; 6680 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6681 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6682 PetscCall(PetscFree(Aj)); 6683 Aj = Aj_new; 6684 } 6685 6686 if (Bnnz < Bnnz1 + Bnnz2) { 6687 PetscInt *Bj_new; 6688 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6689 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6690 PetscCall(PetscFree(Bj)); 6691 Bj = Bj_new; 6692 } 6693 6694 /* Create new submatrices for on-process and off-process coupling */ 6695 PetscScalar *Aa, *Ba; 6696 MatType rtype; 6697 Mat_SeqAIJ *a, *b; 6698 PetscObjectState state; 6699 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6700 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6701 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6702 if (cstart) { 6703 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6704 } 6705 6706 PetscCall(MatGetRootType_Private(mat, &rtype)); 6707 6708 MatSeqXAIJGetOptions_Private(mpiaij->A); 6709 PetscCall(MatDestroy(&mpiaij->A)); 6710 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6711 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6712 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6713 6714 MatSeqXAIJGetOptions_Private(mpiaij->B); 6715 PetscCall(MatDestroy(&mpiaij->B)); 6716 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6717 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6718 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6719 6720 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6721 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6722 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6723 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6724 6725 a = (Mat_SeqAIJ *)mpiaij->A->data; 6726 b = (Mat_SeqAIJ *)mpiaij->B->data; 6727 a->free_a = PETSC_TRUE; 6728 a->free_ij = PETSC_TRUE; 6729 b->free_a = PETSC_TRUE; 6730 b->free_ij = PETSC_TRUE; 6731 a->maxnz = a->nz; 6732 b->maxnz = b->nz; 6733 6734 /* conversion must happen AFTER multiply setup */ 6735 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6736 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6737 PetscCall(VecDestroy(&mpiaij->lvec)); 6738 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6739 6740 // Put the COO struct in a container and then attach that to the matrix 6741 PetscCall(PetscMalloc1(1, &coo)); 6742 coo->n = coo_n; 6743 coo->sf = sf2; 6744 coo->sendlen = nleaves; 6745 coo->recvlen = nroots; 6746 coo->Annz = Annz; 6747 coo->Bnnz = Bnnz; 6748 coo->Annz2 = Annz2; 6749 coo->Bnnz2 = Bnnz2; 6750 coo->Atot1 = Atot1; 6751 coo->Atot2 = Atot2; 6752 coo->Btot1 = Btot1; 6753 coo->Btot2 = Btot2; 6754 coo->Ajmap1 = Ajmap1; 6755 coo->Aperm1 = Aperm1; 6756 coo->Bjmap1 = Bjmap1; 6757 coo->Bperm1 = Bperm1; 6758 coo->Aimap2 = Aimap2; 6759 coo->Ajmap2 = Ajmap2; 6760 coo->Aperm2 = Aperm2; 6761 coo->Bimap2 = Bimap2; 6762 coo->Bjmap2 = Bjmap2; 6763 coo->Bperm2 = Bperm2; 6764 coo->Cperm1 = Cperm1; 6765 // Allocate in preallocation. If not used, it has zero cost on host 6766 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6767 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6768 PetscCall(PetscContainerSetPointer(container, coo)); 6769 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6770 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6771 PetscCall(PetscContainerDestroy(&container)); 6772 PetscFunctionReturn(PETSC_SUCCESS); 6773 } 6774 6775 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6776 { 6777 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6778 Mat A = mpiaij->A, B = mpiaij->B; 6779 PetscScalar *Aa, *Ba; 6780 PetscScalar *sendbuf, *recvbuf; 6781 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6782 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6783 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6784 const PetscCount *Cperm1; 6785 PetscContainer container; 6786 MatCOOStruct_MPIAIJ *coo; 6787 6788 PetscFunctionBegin; 6789 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6790 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6791 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6792 sendbuf = coo->sendbuf; 6793 recvbuf = coo->recvbuf; 6794 Ajmap1 = coo->Ajmap1; 6795 Ajmap2 = coo->Ajmap2; 6796 Aimap2 = coo->Aimap2; 6797 Bjmap1 = coo->Bjmap1; 6798 Bjmap2 = coo->Bjmap2; 6799 Bimap2 = coo->Bimap2; 6800 Aperm1 = coo->Aperm1; 6801 Aperm2 = coo->Aperm2; 6802 Bperm1 = coo->Bperm1; 6803 Bperm2 = coo->Bperm2; 6804 Cperm1 = coo->Cperm1; 6805 6806 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6807 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6808 6809 /* Pack entries to be sent to remote */ 6810 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6811 6812 /* Send remote entries to their owner and overlap the communication with local computation */ 6813 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6814 /* Add local entries to A and B */ 6815 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6816 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6817 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6818 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6819 } 6820 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6821 PetscScalar sum = 0.0; 6822 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6823 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6824 } 6825 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6826 6827 /* Add received remote entries to A and B */ 6828 for (PetscCount i = 0; i < coo->Annz2; i++) { 6829 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6830 } 6831 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6832 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6833 } 6834 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6835 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6836 PetscFunctionReturn(PETSC_SUCCESS); 6837 } 6838 6839 /*MC 6840 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6841 6842 Options Database Keys: 6843 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6844 6845 Level: beginner 6846 6847 Notes: 6848 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6849 in this case the values associated with the rows and columns one passes in are set to zero 6850 in the matrix 6851 6852 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6853 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6854 6855 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6856 M*/ 6857 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6858 { 6859 Mat_MPIAIJ *b; 6860 PetscMPIInt size; 6861 6862 PetscFunctionBegin; 6863 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6864 6865 PetscCall(PetscNew(&b)); 6866 B->data = (void *)b; 6867 B->ops[0] = MatOps_Values; 6868 B->assembled = PETSC_FALSE; 6869 B->insertmode = NOT_SET_VALUES; 6870 b->size = size; 6871 6872 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6873 6874 /* build cache for off array entries formed */ 6875 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6876 6877 b->donotstash = PETSC_FALSE; 6878 b->colmap = NULL; 6879 b->garray = NULL; 6880 b->roworiented = PETSC_TRUE; 6881 6882 /* stuff used for matrix vector multiply */ 6883 b->lvec = NULL; 6884 b->Mvctx = NULL; 6885 6886 /* stuff for MatGetRow() */ 6887 b->rowindices = NULL; 6888 b->rowvalues = NULL; 6889 b->getrowactive = PETSC_FALSE; 6890 6891 /* flexible pointer used in CUSPARSE classes */ 6892 b->spptr = NULL; 6893 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6904 #if defined(PETSC_HAVE_CUDA) 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6906 #endif 6907 #if defined(PETSC_HAVE_HIP) 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6909 #endif 6910 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6912 #endif 6913 #if defined(PETSC_HAVE_MKL_SPARSE) 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6915 #endif 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6920 #if defined(PETSC_HAVE_ELEMENTAL) 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6922 #endif 6923 #if defined(PETSC_HAVE_SCALAPACK) 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6925 #endif 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6928 #if defined(PETSC_HAVE_HYPRE) 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6931 #endif 6932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6936 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6937 PetscFunctionReturn(PETSC_SUCCESS); 6938 } 6939 6940 /*@ 6941 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6942 and "off-diagonal" part of the matrix in CSR format. 6943 6944 Collective 6945 6946 Input Parameters: 6947 + comm - MPI communicator 6948 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6949 . n - This value should be the same as the local size used in creating the 6950 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6951 calculated if `N` is given) For square matrices `n` is almost always `m`. 6952 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6953 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6954 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6955 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6956 . a - matrix values 6957 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6958 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6959 - oa - matrix values 6960 6961 Output Parameter: 6962 . mat - the matrix 6963 6964 Level: advanced 6965 6966 Notes: 6967 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6968 must free the arrays once the matrix has been destroyed and not before. 6969 6970 The `i` and `j` indices are 0 based 6971 6972 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6973 6974 This sets local rows and cannot be used to set off-processor values. 6975 6976 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6977 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6978 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6979 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6980 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6981 communication if it is known that only local entries will be set. 6982 6983 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6984 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6985 @*/ 6986 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6987 { 6988 Mat_MPIAIJ *maij; 6989 6990 PetscFunctionBegin; 6991 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6992 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6993 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6994 PetscCall(MatCreate(comm, mat)); 6995 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6996 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6997 maij = (Mat_MPIAIJ *)(*mat)->data; 6998 6999 (*mat)->preallocated = PETSC_TRUE; 7000 7001 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7002 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7003 7004 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7005 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7006 7007 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7008 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7009 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7010 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7011 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7012 PetscFunctionReturn(PETSC_SUCCESS); 7013 } 7014 7015 typedef struct { 7016 Mat *mp; /* intermediate products */ 7017 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7018 PetscInt cp; /* number of intermediate products */ 7019 7020 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7021 PetscInt *startsj_s, *startsj_r; 7022 PetscScalar *bufa; 7023 Mat P_oth; 7024 7025 /* may take advantage of merging product->B */ 7026 Mat Bloc; /* B-local by merging diag and off-diag */ 7027 7028 /* cusparse does not have support to split between symbolic and numeric phases. 7029 When api_user is true, we don't need to update the numerical values 7030 of the temporary storage */ 7031 PetscBool reusesym; 7032 7033 /* support for COO values insertion */ 7034 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7035 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7036 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7037 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7038 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7039 PetscMemType mtype; 7040 7041 /* customization */ 7042 PetscBool abmerge; 7043 PetscBool P_oth_bind; 7044 } MatMatMPIAIJBACKEND; 7045 7046 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7047 { 7048 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7049 PetscInt i; 7050 7051 PetscFunctionBegin; 7052 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7053 PetscCall(PetscFree(mmdata->bufa)); 7054 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7055 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7056 PetscCall(MatDestroy(&mmdata->P_oth)); 7057 PetscCall(MatDestroy(&mmdata->Bloc)); 7058 PetscCall(PetscSFDestroy(&mmdata->sf)); 7059 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7060 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7061 PetscCall(PetscFree(mmdata->own[0])); 7062 PetscCall(PetscFree(mmdata->own)); 7063 PetscCall(PetscFree(mmdata->off[0])); 7064 PetscCall(PetscFree(mmdata->off)); 7065 PetscCall(PetscFree(mmdata)); 7066 PetscFunctionReturn(PETSC_SUCCESS); 7067 } 7068 7069 /* Copy selected n entries with indices in idx[] of A to v[]. 7070 If idx is NULL, copy the whole data array of A to v[] 7071 */ 7072 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7073 { 7074 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7075 7076 PetscFunctionBegin; 7077 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7078 if (f) { 7079 PetscCall((*f)(A, n, idx, v)); 7080 } else { 7081 const PetscScalar *vv; 7082 7083 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7084 if (n && idx) { 7085 PetscScalar *w = v; 7086 const PetscInt *oi = idx; 7087 PetscInt j; 7088 7089 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7090 } else { 7091 PetscCall(PetscArraycpy(v, vv, n)); 7092 } 7093 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7094 } 7095 PetscFunctionReturn(PETSC_SUCCESS); 7096 } 7097 7098 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7099 { 7100 MatMatMPIAIJBACKEND *mmdata; 7101 PetscInt i, n_d, n_o; 7102 7103 PetscFunctionBegin; 7104 MatCheckProduct(C, 1); 7105 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7106 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7107 if (!mmdata->reusesym) { /* update temporary matrices */ 7108 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7109 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7110 } 7111 mmdata->reusesym = PETSC_FALSE; 7112 7113 for (i = 0; i < mmdata->cp; i++) { 7114 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7115 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7116 } 7117 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7118 PetscInt noff; 7119 7120 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7121 if (mmdata->mptmp[i]) continue; 7122 if (noff) { 7123 PetscInt nown; 7124 7125 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7126 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7127 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7128 n_o += noff; 7129 n_d += nown; 7130 } else { 7131 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7132 7133 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7134 n_d += mm->nz; 7135 } 7136 } 7137 if (mmdata->hasoffproc) { /* offprocess insertion */ 7138 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7139 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7140 } 7141 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7142 PetscFunctionReturn(PETSC_SUCCESS); 7143 } 7144 7145 /* Support for Pt * A, A * P, or Pt * A * P */ 7146 #define MAX_NUMBER_INTERMEDIATE 4 7147 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7148 { 7149 Mat_Product *product = C->product; 7150 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7151 Mat_MPIAIJ *a, *p; 7152 MatMatMPIAIJBACKEND *mmdata; 7153 ISLocalToGlobalMapping P_oth_l2g = NULL; 7154 IS glob = NULL; 7155 const char *prefix; 7156 char pprefix[256]; 7157 const PetscInt *globidx, *P_oth_idx; 7158 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7159 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7160 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7161 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7162 /* a base offset; type-2: sparse with a local to global map table */ 7163 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7164 7165 MatProductType ptype; 7166 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7167 PetscMPIInt size; 7168 7169 PetscFunctionBegin; 7170 MatCheckProduct(C, 1); 7171 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7172 ptype = product->type; 7173 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7174 ptype = MATPRODUCT_AB; 7175 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7176 } 7177 switch (ptype) { 7178 case MATPRODUCT_AB: 7179 A = product->A; 7180 P = product->B; 7181 m = A->rmap->n; 7182 n = P->cmap->n; 7183 M = A->rmap->N; 7184 N = P->cmap->N; 7185 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7186 break; 7187 case MATPRODUCT_AtB: 7188 P = product->A; 7189 A = product->B; 7190 m = P->cmap->n; 7191 n = A->cmap->n; 7192 M = P->cmap->N; 7193 N = A->cmap->N; 7194 hasoffproc = PETSC_TRUE; 7195 break; 7196 case MATPRODUCT_PtAP: 7197 A = product->A; 7198 P = product->B; 7199 m = P->cmap->n; 7200 n = P->cmap->n; 7201 M = P->cmap->N; 7202 N = P->cmap->N; 7203 hasoffproc = PETSC_TRUE; 7204 break; 7205 default: 7206 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7207 } 7208 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7209 if (size == 1) hasoffproc = PETSC_FALSE; 7210 7211 /* defaults */ 7212 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7213 mp[i] = NULL; 7214 mptmp[i] = PETSC_FALSE; 7215 rmapt[i] = -1; 7216 cmapt[i] = -1; 7217 rmapa[i] = NULL; 7218 cmapa[i] = NULL; 7219 } 7220 7221 /* customization */ 7222 PetscCall(PetscNew(&mmdata)); 7223 mmdata->reusesym = product->api_user; 7224 if (ptype == MATPRODUCT_AB) { 7225 if (product->api_user) { 7226 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7227 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7228 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7229 PetscOptionsEnd(); 7230 } else { 7231 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7232 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7233 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7234 PetscOptionsEnd(); 7235 } 7236 } else if (ptype == MATPRODUCT_PtAP) { 7237 if (product->api_user) { 7238 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7239 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7240 PetscOptionsEnd(); 7241 } else { 7242 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7243 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7244 PetscOptionsEnd(); 7245 } 7246 } 7247 a = (Mat_MPIAIJ *)A->data; 7248 p = (Mat_MPIAIJ *)P->data; 7249 PetscCall(MatSetSizes(C, m, n, M, N)); 7250 PetscCall(PetscLayoutSetUp(C->rmap)); 7251 PetscCall(PetscLayoutSetUp(C->cmap)); 7252 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7253 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7254 7255 cp = 0; 7256 switch (ptype) { 7257 case MATPRODUCT_AB: /* A * P */ 7258 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7259 7260 /* A_diag * P_local (merged or not) */ 7261 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7262 /* P is product->B */ 7263 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7264 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7265 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7266 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7267 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7268 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7269 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7270 mp[cp]->product->api_user = product->api_user; 7271 PetscCall(MatProductSetFromOptions(mp[cp])); 7272 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7273 PetscCall(ISGetIndices(glob, &globidx)); 7274 rmapt[cp] = 1; 7275 cmapt[cp] = 2; 7276 cmapa[cp] = globidx; 7277 mptmp[cp] = PETSC_FALSE; 7278 cp++; 7279 } else { /* A_diag * P_diag and A_diag * P_off */ 7280 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7281 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7282 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7283 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7284 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7285 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7286 mp[cp]->product->api_user = product->api_user; 7287 PetscCall(MatProductSetFromOptions(mp[cp])); 7288 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7289 rmapt[cp] = 1; 7290 cmapt[cp] = 1; 7291 mptmp[cp] = PETSC_FALSE; 7292 cp++; 7293 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7294 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7295 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7296 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7297 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7298 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7299 mp[cp]->product->api_user = product->api_user; 7300 PetscCall(MatProductSetFromOptions(mp[cp])); 7301 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7302 rmapt[cp] = 1; 7303 cmapt[cp] = 2; 7304 cmapa[cp] = p->garray; 7305 mptmp[cp] = PETSC_FALSE; 7306 cp++; 7307 } 7308 7309 /* A_off * P_other */ 7310 if (mmdata->P_oth) { 7311 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7312 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7313 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7314 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7315 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7316 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7317 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7318 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7319 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7320 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7321 mp[cp]->product->api_user = product->api_user; 7322 PetscCall(MatProductSetFromOptions(mp[cp])); 7323 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7324 rmapt[cp] = 1; 7325 cmapt[cp] = 2; 7326 cmapa[cp] = P_oth_idx; 7327 mptmp[cp] = PETSC_FALSE; 7328 cp++; 7329 } 7330 break; 7331 7332 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7333 /* A is product->B */ 7334 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7335 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7336 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7337 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7338 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7339 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7340 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7341 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7342 mp[cp]->product->api_user = product->api_user; 7343 PetscCall(MatProductSetFromOptions(mp[cp])); 7344 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7345 PetscCall(ISGetIndices(glob, &globidx)); 7346 rmapt[cp] = 2; 7347 rmapa[cp] = globidx; 7348 cmapt[cp] = 2; 7349 cmapa[cp] = globidx; 7350 mptmp[cp] = PETSC_FALSE; 7351 cp++; 7352 } else { 7353 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7354 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7355 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7356 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7357 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7358 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7359 mp[cp]->product->api_user = product->api_user; 7360 PetscCall(MatProductSetFromOptions(mp[cp])); 7361 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7362 PetscCall(ISGetIndices(glob, &globidx)); 7363 rmapt[cp] = 1; 7364 cmapt[cp] = 2; 7365 cmapa[cp] = globidx; 7366 mptmp[cp] = PETSC_FALSE; 7367 cp++; 7368 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7369 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7370 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7371 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7372 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7373 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7374 mp[cp]->product->api_user = product->api_user; 7375 PetscCall(MatProductSetFromOptions(mp[cp])); 7376 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7377 rmapt[cp] = 2; 7378 rmapa[cp] = p->garray; 7379 cmapt[cp] = 2; 7380 cmapa[cp] = globidx; 7381 mptmp[cp] = PETSC_FALSE; 7382 cp++; 7383 } 7384 break; 7385 case MATPRODUCT_PtAP: 7386 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7387 /* P is product->B */ 7388 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7389 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7390 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7391 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7392 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7393 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7394 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7395 mp[cp]->product->api_user = product->api_user; 7396 PetscCall(MatProductSetFromOptions(mp[cp])); 7397 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7398 PetscCall(ISGetIndices(glob, &globidx)); 7399 rmapt[cp] = 2; 7400 rmapa[cp] = globidx; 7401 cmapt[cp] = 2; 7402 cmapa[cp] = globidx; 7403 mptmp[cp] = PETSC_FALSE; 7404 cp++; 7405 if (mmdata->P_oth) { 7406 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7407 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7408 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7409 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7410 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7411 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7412 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7413 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7414 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7415 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7416 mp[cp]->product->api_user = product->api_user; 7417 PetscCall(MatProductSetFromOptions(mp[cp])); 7418 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7419 mptmp[cp] = PETSC_TRUE; 7420 cp++; 7421 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7422 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7423 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7424 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7425 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7426 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7427 mp[cp]->product->api_user = product->api_user; 7428 PetscCall(MatProductSetFromOptions(mp[cp])); 7429 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7430 rmapt[cp] = 2; 7431 rmapa[cp] = globidx; 7432 cmapt[cp] = 2; 7433 cmapa[cp] = P_oth_idx; 7434 mptmp[cp] = PETSC_FALSE; 7435 cp++; 7436 } 7437 break; 7438 default: 7439 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7440 } 7441 /* sanity check */ 7442 if (size > 1) 7443 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7444 7445 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7446 for (i = 0; i < cp; i++) { 7447 mmdata->mp[i] = mp[i]; 7448 mmdata->mptmp[i] = mptmp[i]; 7449 } 7450 mmdata->cp = cp; 7451 C->product->data = mmdata; 7452 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7453 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7454 7455 /* memory type */ 7456 mmdata->mtype = PETSC_MEMTYPE_HOST; 7457 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7458 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7459 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7460 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7461 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7462 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7463 7464 /* prepare coo coordinates for values insertion */ 7465 7466 /* count total nonzeros of those intermediate seqaij Mats 7467 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7468 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7469 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7470 */ 7471 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7472 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7473 if (mptmp[cp]) continue; 7474 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7475 const PetscInt *rmap = rmapa[cp]; 7476 const PetscInt mr = mp[cp]->rmap->n; 7477 const PetscInt rs = C->rmap->rstart; 7478 const PetscInt re = C->rmap->rend; 7479 const PetscInt *ii = mm->i; 7480 for (i = 0; i < mr; i++) { 7481 const PetscInt gr = rmap[i]; 7482 const PetscInt nz = ii[i + 1] - ii[i]; 7483 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7484 else ncoo_oown += nz; /* this row is local */ 7485 } 7486 } else ncoo_d += mm->nz; 7487 } 7488 7489 /* 7490 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7491 7492 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7493 7494 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7495 7496 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7497 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7498 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7499 7500 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7501 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7502 */ 7503 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7504 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7505 7506 /* gather (i,j) of nonzeros inserted by remote procs */ 7507 if (hasoffproc) { 7508 PetscSF msf; 7509 PetscInt ncoo2, *coo_i2, *coo_j2; 7510 7511 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7512 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7513 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7514 7515 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7516 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7517 PetscInt *idxoff = mmdata->off[cp]; 7518 PetscInt *idxown = mmdata->own[cp]; 7519 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7520 const PetscInt *rmap = rmapa[cp]; 7521 const PetscInt *cmap = cmapa[cp]; 7522 const PetscInt *ii = mm->i; 7523 PetscInt *coi = coo_i + ncoo_o; 7524 PetscInt *coj = coo_j + ncoo_o; 7525 const PetscInt mr = mp[cp]->rmap->n; 7526 const PetscInt rs = C->rmap->rstart; 7527 const PetscInt re = C->rmap->rend; 7528 const PetscInt cs = C->cmap->rstart; 7529 for (i = 0; i < mr; i++) { 7530 const PetscInt *jj = mm->j + ii[i]; 7531 const PetscInt gr = rmap[i]; 7532 const PetscInt nz = ii[i + 1] - ii[i]; 7533 if (gr < rs || gr >= re) { /* this is an offproc row */ 7534 for (j = ii[i]; j < ii[i + 1]; j++) { 7535 *coi++ = gr; 7536 *idxoff++ = j; 7537 } 7538 if (!cmapt[cp]) { /* already global */ 7539 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7540 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7541 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7542 } else { /* offdiag */ 7543 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7544 } 7545 ncoo_o += nz; 7546 } else { /* this is a local row */ 7547 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7548 } 7549 } 7550 } 7551 mmdata->off[cp + 1] = idxoff; 7552 mmdata->own[cp + 1] = idxown; 7553 } 7554 7555 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7556 PetscInt incoo_o; 7557 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7558 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7559 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7560 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7561 ncoo = ncoo_d + ncoo_oown + ncoo2; 7562 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7563 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7564 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7565 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7566 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7567 PetscCall(PetscFree2(coo_i, coo_j)); 7568 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7569 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7570 coo_i = coo_i2; 7571 coo_j = coo_j2; 7572 } else { /* no offproc values insertion */ 7573 ncoo = ncoo_d; 7574 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7575 7576 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7577 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7578 PetscCall(PetscSFSetUp(mmdata->sf)); 7579 } 7580 mmdata->hasoffproc = hasoffproc; 7581 7582 /* gather (i,j) of nonzeros inserted locally */ 7583 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7584 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7585 PetscInt *coi = coo_i + ncoo_d; 7586 PetscInt *coj = coo_j + ncoo_d; 7587 const PetscInt *jj = mm->j; 7588 const PetscInt *ii = mm->i; 7589 const PetscInt *cmap = cmapa[cp]; 7590 const PetscInt *rmap = rmapa[cp]; 7591 const PetscInt mr = mp[cp]->rmap->n; 7592 const PetscInt rs = C->rmap->rstart; 7593 const PetscInt re = C->rmap->rend; 7594 const PetscInt cs = C->cmap->rstart; 7595 7596 if (mptmp[cp]) continue; 7597 if (rmapt[cp] == 1) { /* consecutive rows */ 7598 /* fill coo_i */ 7599 for (i = 0; i < mr; i++) { 7600 const PetscInt gr = i + rs; 7601 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7602 } 7603 /* fill coo_j */ 7604 if (!cmapt[cp]) { /* type-0, already global */ 7605 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7606 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7607 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7608 } else { /* type-2, local to global for sparse columns */ 7609 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7610 } 7611 ncoo_d += mm->nz; 7612 } else if (rmapt[cp] == 2) { /* sparse rows */ 7613 for (i = 0; i < mr; i++) { 7614 const PetscInt *jj = mm->j + ii[i]; 7615 const PetscInt gr = rmap[i]; 7616 const PetscInt nz = ii[i + 1] - ii[i]; 7617 if (gr >= rs && gr < re) { /* local rows */ 7618 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7619 if (!cmapt[cp]) { /* type-0, already global */ 7620 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7621 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7622 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7623 } else { /* type-2, local to global for sparse columns */ 7624 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7625 } 7626 ncoo_d += nz; 7627 } 7628 } 7629 } 7630 } 7631 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7632 PetscCall(ISDestroy(&glob)); 7633 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7634 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7635 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7636 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7637 7638 /* preallocate with COO data */ 7639 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7640 PetscCall(PetscFree2(coo_i, coo_j)); 7641 PetscFunctionReturn(PETSC_SUCCESS); 7642 } 7643 7644 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7645 { 7646 Mat_Product *product = mat->product; 7647 #if defined(PETSC_HAVE_DEVICE) 7648 PetscBool match = PETSC_FALSE; 7649 PetscBool usecpu = PETSC_FALSE; 7650 #else 7651 PetscBool match = PETSC_TRUE; 7652 #endif 7653 7654 PetscFunctionBegin; 7655 MatCheckProduct(mat, 1); 7656 #if defined(PETSC_HAVE_DEVICE) 7657 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7658 if (match) { /* we can always fallback to the CPU if requested */ 7659 switch (product->type) { 7660 case MATPRODUCT_AB: 7661 if (product->api_user) { 7662 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7663 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7664 PetscOptionsEnd(); 7665 } else { 7666 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7667 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7668 PetscOptionsEnd(); 7669 } 7670 break; 7671 case MATPRODUCT_AtB: 7672 if (product->api_user) { 7673 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7674 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7675 PetscOptionsEnd(); 7676 } else { 7677 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7678 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7679 PetscOptionsEnd(); 7680 } 7681 break; 7682 case MATPRODUCT_PtAP: 7683 if (product->api_user) { 7684 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7685 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7686 PetscOptionsEnd(); 7687 } else { 7688 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7689 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7690 PetscOptionsEnd(); 7691 } 7692 break; 7693 default: 7694 break; 7695 } 7696 match = (PetscBool)!usecpu; 7697 } 7698 #endif 7699 if (match) { 7700 switch (product->type) { 7701 case MATPRODUCT_AB: 7702 case MATPRODUCT_AtB: 7703 case MATPRODUCT_PtAP: 7704 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7705 break; 7706 default: 7707 break; 7708 } 7709 } 7710 /* fallback to MPIAIJ ops */ 7711 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7712 PetscFunctionReturn(PETSC_SUCCESS); 7713 } 7714 7715 /* 7716 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7717 7718 n - the number of block indices in cc[] 7719 cc - the block indices (must be large enough to contain the indices) 7720 */ 7721 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7722 { 7723 PetscInt cnt = -1, nidx, j; 7724 const PetscInt *idx; 7725 7726 PetscFunctionBegin; 7727 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7728 if (nidx) { 7729 cnt = 0; 7730 cc[cnt] = idx[0] / bs; 7731 for (j = 1; j < nidx; j++) { 7732 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7733 } 7734 } 7735 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7736 *n = cnt + 1; 7737 PetscFunctionReturn(PETSC_SUCCESS); 7738 } 7739 7740 /* 7741 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7742 7743 ncollapsed - the number of block indices 7744 collapsed - the block indices (must be large enough to contain the indices) 7745 */ 7746 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7747 { 7748 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7749 7750 PetscFunctionBegin; 7751 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7752 for (i = start + 1; i < start + bs; i++) { 7753 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7754 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7755 cprevtmp = cprev; 7756 cprev = merged; 7757 merged = cprevtmp; 7758 } 7759 *ncollapsed = nprev; 7760 if (collapsed) *collapsed = cprev; 7761 PetscFunctionReturn(PETSC_SUCCESS); 7762 } 7763 7764 /* 7765 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7766 7767 Input Parameter: 7768 . Amat - matrix 7769 - symmetrize - make the result symmetric 7770 + scale - scale with diagonal 7771 7772 Output Parameter: 7773 . a_Gmat - output scalar graph >= 0 7774 7775 */ 7776 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7777 { 7778 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7779 MPI_Comm comm; 7780 Mat Gmat; 7781 PetscBool ismpiaij, isseqaij; 7782 Mat a, b, c; 7783 MatType jtype; 7784 7785 PetscFunctionBegin; 7786 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7787 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7788 PetscCall(MatGetSize(Amat, &MM, &NN)); 7789 PetscCall(MatGetBlockSize(Amat, &bs)); 7790 nloc = (Iend - Istart) / bs; 7791 7792 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7793 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7794 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7795 7796 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7797 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7798 implementation */ 7799 if (bs > 1) { 7800 PetscCall(MatGetType(Amat, &jtype)); 7801 PetscCall(MatCreate(comm, &Gmat)); 7802 PetscCall(MatSetType(Gmat, jtype)); 7803 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7804 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7805 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7806 PetscInt *d_nnz, *o_nnz; 7807 MatScalar *aa, val, *AA; 7808 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7809 7810 if (isseqaij) { 7811 a = Amat; 7812 b = NULL; 7813 } else { 7814 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7815 a = d->A; 7816 b = d->B; 7817 } 7818 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7819 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7820 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7821 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7822 const PetscInt *cols1, *cols2; 7823 7824 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7825 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7826 nnz[brow / bs] = nc2 / bs; 7827 if (nc2 % bs) ok = 0; 7828 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7829 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7830 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7831 if (nc1 != nc2) ok = 0; 7832 else { 7833 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7834 if (cols1[jj] != cols2[jj]) ok = 0; 7835 if (cols1[jj] % bs != jj % bs) ok = 0; 7836 } 7837 } 7838 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7839 } 7840 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7841 if (!ok) { 7842 PetscCall(PetscFree2(d_nnz, o_nnz)); 7843 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7844 goto old_bs; 7845 } 7846 } 7847 } 7848 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7849 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7850 PetscCall(PetscFree2(d_nnz, o_nnz)); 7851 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7852 // diag 7853 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7854 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7855 7856 ai = aseq->i; 7857 n = ai[brow + 1] - ai[brow]; 7858 aj = aseq->j + ai[brow]; 7859 for (PetscInt k = 0; k < n; k += bs) { // block columns 7860 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7861 val = 0; 7862 if (index_size == 0) { 7863 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7864 aa = aseq->a + ai[brow + ii] + k; 7865 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7866 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7867 } 7868 } 7869 } else { // use (index,index) value if provided 7870 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7871 PetscInt ii = index[iii]; 7872 aa = aseq->a + ai[brow + ii] + k; 7873 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7874 PetscInt jj = index[jjj]; 7875 val += PetscAbs(PetscRealPart(aa[jj])); 7876 } 7877 } 7878 } 7879 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7880 AA[k / bs] = val; 7881 } 7882 grow = Istart / bs + brow / bs; 7883 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7884 } 7885 // off-diag 7886 if (ismpiaij) { 7887 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7888 const PetscScalar *vals; 7889 const PetscInt *cols, *garray = aij->garray; 7890 7891 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7892 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7893 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7894 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7895 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7896 AA[k / bs] = 0; 7897 AJ[cidx] = garray[cols[k]] / bs; 7898 } 7899 nc = ncols / bs; 7900 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7901 if (index_size == 0) { 7902 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7903 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7904 for (PetscInt k = 0; k < ncols; k += bs) { 7905 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7906 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7907 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7908 } 7909 } 7910 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7911 } 7912 } else { // use (index,index) value if provided 7913 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7914 PetscInt ii = index[iii]; 7915 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7916 for (PetscInt k = 0; k < ncols; k += bs) { 7917 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7918 PetscInt jj = index[jjj]; 7919 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7920 } 7921 } 7922 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7923 } 7924 } 7925 grow = Istart / bs + brow / bs; 7926 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7927 } 7928 } 7929 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7930 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7931 PetscCall(PetscFree2(AA, AJ)); 7932 } else { 7933 const PetscScalar *vals; 7934 const PetscInt *idx; 7935 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7936 old_bs: 7937 /* 7938 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7939 */ 7940 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7941 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7942 if (isseqaij) { 7943 PetscInt max_d_nnz; 7944 7945 /* 7946 Determine exact preallocation count for (sequential) scalar matrix 7947 */ 7948 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7949 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7950 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7951 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7952 PetscCall(PetscFree3(w0, w1, w2)); 7953 } else if (ismpiaij) { 7954 Mat Daij, Oaij; 7955 const PetscInt *garray; 7956 PetscInt max_d_nnz; 7957 7958 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7959 /* 7960 Determine exact preallocation count for diagonal block portion of scalar matrix 7961 */ 7962 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7963 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7964 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7965 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7966 PetscCall(PetscFree3(w0, w1, w2)); 7967 /* 7968 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7969 */ 7970 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7971 o_nnz[jj] = 0; 7972 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7973 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7974 o_nnz[jj] += ncols; 7975 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7976 } 7977 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7978 } 7979 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7980 /* get scalar copy (norms) of matrix */ 7981 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7982 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7983 PetscCall(PetscFree2(d_nnz, o_nnz)); 7984 for (Ii = Istart; Ii < Iend; Ii++) { 7985 PetscInt dest_row = Ii / bs; 7986 7987 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7988 for (jj = 0; jj < ncols; jj++) { 7989 PetscInt dest_col = idx[jj] / bs; 7990 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7991 7992 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7993 } 7994 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7995 } 7996 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7997 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7998 } 7999 } else { 8000 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8001 else { 8002 Gmat = Amat; 8003 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8004 } 8005 if (isseqaij) { 8006 a = Gmat; 8007 b = NULL; 8008 } else { 8009 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8010 a = d->A; 8011 b = d->B; 8012 } 8013 if (filter >= 0 || scale) { 8014 /* take absolute value of each entry */ 8015 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8016 MatInfo info; 8017 PetscScalar *avals; 8018 8019 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8020 PetscCall(MatSeqAIJGetArray(c, &avals)); 8021 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8022 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8023 } 8024 } 8025 } 8026 if (symmetrize) { 8027 PetscBool isset, issym; 8028 8029 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8030 if (!isset || !issym) { 8031 Mat matTrans; 8032 8033 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8034 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8035 PetscCall(MatDestroy(&matTrans)); 8036 } 8037 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8038 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8039 if (scale) { 8040 /* scale c for all diagonal values = 1 or -1 */ 8041 Vec diag; 8042 8043 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8044 PetscCall(MatGetDiagonal(Gmat, diag)); 8045 PetscCall(VecReciprocal(diag)); 8046 PetscCall(VecSqrtAbs(diag)); 8047 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8048 PetscCall(VecDestroy(&diag)); 8049 } 8050 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8051 if (filter >= 0) { 8052 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8053 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8054 } 8055 *a_Gmat = Gmat; 8056 PetscFunctionReturn(PETSC_SUCCESS); 8057 } 8058 8059 /* 8060 Special version for direct calls from Fortran 8061 */ 8062 8063 /* Change these macros so can be used in void function */ 8064 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8065 #undef PetscCall 8066 #define PetscCall(...) \ 8067 do { \ 8068 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8069 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8070 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8071 return; \ 8072 } \ 8073 } while (0) 8074 8075 #undef SETERRQ 8076 #define SETERRQ(comm, ierr, ...) \ 8077 do { \ 8078 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8079 return; \ 8080 } while (0) 8081 8082 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8083 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8084 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8085 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8086 #else 8087 #endif 8088 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8089 { 8090 Mat mat = *mmat; 8091 PetscInt m = *mm, n = *mn; 8092 InsertMode addv = *maddv; 8093 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8094 PetscScalar value; 8095 8096 MatCheckPreallocated(mat, 1); 8097 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8098 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8099 { 8100 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8101 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8102 PetscBool roworiented = aij->roworiented; 8103 8104 /* Some Variables required in the macro */ 8105 Mat A = aij->A; 8106 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8107 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8108 MatScalar *aa; 8109 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8110 Mat B = aij->B; 8111 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8112 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8113 MatScalar *ba; 8114 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8115 * cannot use "#if defined" inside a macro. */ 8116 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8117 8118 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8119 PetscInt nonew = a->nonew; 8120 MatScalar *ap1, *ap2; 8121 8122 PetscFunctionBegin; 8123 PetscCall(MatSeqAIJGetArray(A, &aa)); 8124 PetscCall(MatSeqAIJGetArray(B, &ba)); 8125 for (i = 0; i < m; i++) { 8126 if (im[i] < 0) continue; 8127 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8128 if (im[i] >= rstart && im[i] < rend) { 8129 row = im[i] - rstart; 8130 lastcol1 = -1; 8131 rp1 = aj + ai[row]; 8132 ap1 = aa + ai[row]; 8133 rmax1 = aimax[row]; 8134 nrow1 = ailen[row]; 8135 low1 = 0; 8136 high1 = nrow1; 8137 lastcol2 = -1; 8138 rp2 = bj + bi[row]; 8139 ap2 = ba + bi[row]; 8140 rmax2 = bimax[row]; 8141 nrow2 = bilen[row]; 8142 low2 = 0; 8143 high2 = nrow2; 8144 8145 for (j = 0; j < n; j++) { 8146 if (roworiented) value = v[i * n + j]; 8147 else value = v[i + j * m]; 8148 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8149 if (in[j] >= cstart && in[j] < cend) { 8150 col = in[j] - cstart; 8151 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8152 } else if (in[j] < 0) continue; 8153 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8154 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8155 } else { 8156 if (mat->was_assembled) { 8157 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8158 #if defined(PETSC_USE_CTABLE) 8159 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8160 col--; 8161 #else 8162 col = aij->colmap[in[j]] - 1; 8163 #endif 8164 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8165 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8166 col = in[j]; 8167 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8168 B = aij->B; 8169 b = (Mat_SeqAIJ *)B->data; 8170 bimax = b->imax; 8171 bi = b->i; 8172 bilen = b->ilen; 8173 bj = b->j; 8174 rp2 = bj + bi[row]; 8175 ap2 = ba + bi[row]; 8176 rmax2 = bimax[row]; 8177 nrow2 = bilen[row]; 8178 low2 = 0; 8179 high2 = nrow2; 8180 bm = aij->B->rmap->n; 8181 ba = b->a; 8182 inserted = PETSC_FALSE; 8183 } 8184 } else col = in[j]; 8185 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8186 } 8187 } 8188 } else if (!aij->donotstash) { 8189 if (roworiented) { 8190 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8191 } else { 8192 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8193 } 8194 } 8195 } 8196 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8197 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8198 } 8199 PetscFunctionReturnVoid(); 8200 } 8201 8202 /* Undefining these here since they were redefined from their original definition above! No 8203 * other PETSc functions should be defined past this point, as it is impossible to recover the 8204 * original definitions */ 8205 #undef PetscCall 8206 #undef SETERRQ 8207