1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 167 PetscFunctionReturn(PETSC_SUCCESS); 168 } 169 170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 171 { 172 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 173 174 PetscFunctionBegin; 175 if (mat->A) { 176 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 177 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 178 } 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181 182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 183 { 184 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 186 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 187 const PetscInt *ia, *ib; 188 const MatScalar *aa, *bb, *aav, *bav; 189 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 190 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 191 192 PetscFunctionBegin; 193 *keptrows = NULL; 194 195 ia = a->i; 196 ib = b->i; 197 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 198 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 199 for (i = 0; i < m; i++) { 200 na = ia[i + 1] - ia[i]; 201 nb = ib[i + 1] - ib[i]; 202 if (!na && !nb) { 203 cnt++; 204 goto ok1; 205 } 206 aa = aav + ia[i]; 207 for (j = 0; j < na; j++) { 208 if (aa[j] != 0.0) goto ok1; 209 } 210 bb = PetscSafePointerPlusOffset(bav, ib[i]); 211 for (j = 0; j < nb; j++) { 212 if (bb[j] != 0.0) goto ok1; 213 } 214 cnt++; 215 ok1:; 216 } 217 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 218 if (!n0rows) { 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 220 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 221 PetscFunctionReturn(PETSC_SUCCESS); 222 } 223 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 224 cnt = 0; 225 for (i = 0; i < m; i++) { 226 na = ia[i + 1] - ia[i]; 227 nb = ib[i + 1] - ib[i]; 228 if (!na && !nb) continue; 229 aa = aav + ia[i]; 230 for (j = 0; j < na; j++) { 231 if (aa[j] != 0.0) { 232 rows[cnt++] = rstart + i; 233 goto ok2; 234 } 235 } 236 bb = PetscSafePointerPlusOffset(bav, ib[i]); 237 for (j = 0; j < nb; j++) { 238 if (bb[j] != 0.0) { 239 rows[cnt++] = rstart + i; 240 goto ok2; 241 } 242 } 243 ok2:; 244 } 245 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 247 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 252 { 253 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 254 PetscBool cong; 255 256 PetscFunctionBegin; 257 PetscCall(MatHasCongruentLayouts(Y, &cong)); 258 if (Y->assembled && cong) { 259 PetscCall(MatDiagonalSet(aij->A, D, is)); 260 } else { 261 PetscCall(MatDiagonalSet_Default(Y, D, is)); 262 } 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 267 { 268 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 269 PetscInt i, rstart, nrows, *rows; 270 271 PetscFunctionBegin; 272 *zrows = NULL; 273 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 274 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 275 for (i = 0; i < nrows; i++) rows[i] += rstart; 276 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 283 PetscInt i, m, n, *garray = aij->garray; 284 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 285 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 286 PetscReal *work; 287 const PetscScalar *dummy; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 if (type == NORM_INFINITY) { 313 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 314 } else { 315 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 316 } 317 PetscCall(PetscFree(work)); 318 if (type == NORM_2) { 319 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 320 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 321 for (i = 0; i < n; i++) reductions[i] /= m; 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 327 { 328 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 329 IS sis, gis; 330 const PetscInt *isis, *igis; 331 PetscInt n, *iis, nsis, ngis, rstart, i; 332 333 PetscFunctionBegin; 334 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 335 PetscCall(MatFindNonzeroRows(a->B, &gis)); 336 PetscCall(ISGetSize(gis, &ngis)); 337 PetscCall(ISGetSize(sis, &nsis)); 338 PetscCall(ISGetIndices(sis, &isis)); 339 PetscCall(ISGetIndices(gis, &igis)); 340 341 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 342 PetscCall(PetscArraycpy(iis, igis, ngis)); 343 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 344 n = ngis + nsis; 345 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 346 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 347 for (i = 0; i < n; i++) iis[i] += rstart; 348 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 349 350 PetscCall(ISRestoreIndices(sis, &isis)); 351 PetscCall(ISRestoreIndices(gis, &igis)); 352 PetscCall(ISDestroy(&sis)); 353 PetscCall(ISDestroy(&gis)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 /* 358 Local utility routine that creates a mapping from the global column 359 number to the local number in the off-diagonal part of the local 360 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 361 a slightly higher hash table cost; without it it is not scalable (each processor 362 has an order N integer array but is fast to access. 363 */ 364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 365 { 366 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 367 PetscInt n = aij->B->cmap->n, i; 368 369 PetscFunctionBegin; 370 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 371 #if defined(PETSC_USE_CTABLE) 372 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 373 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 374 #else 375 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 376 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 377 #endif 378 PetscFunctionReturn(PETSC_SUCCESS); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 382 do { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col; \ 386 while (high1 - low1 > 5) { \ 387 t = (low1 + high1) / 2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i = low1; _i < high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) { \ 395 ap1[_i] += value; \ 396 /* Not sure LogFlops will slow dow the code or not */ \ 397 (void)PetscLogFlops(1.0); \ 398 } else ap1[_i] = value; \ 399 goto a_noinsert; \ 400 } \ 401 } \ 402 if (value == 0.0 && ignorezeroentries && row != col) { \ 403 low1 = 0; \ 404 high1 = nrow1; \ 405 goto a_noinsert; \ 406 } \ 407 if (nonew == 1) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 413 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 414 N = nrow1++ - 1; \ 415 a->nz++; \ 416 high1++; \ 417 /* shift up all the later entries in this row */ \ 418 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 419 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 B->nonzerostate++; \ 468 b_noinsert:; \ 469 bilen[row] = nrow2; \ 470 } while (0) 471 472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 473 { 474 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 475 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 476 PetscInt l, *garray = mat->garray, diag; 477 PetscScalar *aa, *ba; 478 479 PetscFunctionBegin; 480 /* code only works for square matrices A */ 481 482 /* find size of row to the left of the diagonal part */ 483 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 484 row = row - diag; 485 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 486 if (garray[b->j[b->i[row] + l]] > diag) break; 487 } 488 if (l) { 489 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 490 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 491 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 492 } 493 494 /* diagonal part */ 495 if (a->i[row + 1] - a->i[row]) { 496 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 497 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 498 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 499 } 500 501 /* right of diagonal part */ 502 if (b->i[row + 1] - b->i[row] - l) { 503 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 504 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 505 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 506 } 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 511 { 512 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 513 PetscScalar value = 0.0; 514 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 515 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 516 PetscBool roworiented = aij->roworiented; 517 518 /* Some Variables required in the macro */ 519 Mat A = aij->A; 520 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 521 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 522 PetscBool ignorezeroentries = a->ignorezeroentries; 523 Mat B = aij->B; 524 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 525 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 526 MatScalar *aa, *ba; 527 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 528 PetscInt nonew; 529 MatScalar *ap1, *ap2; 530 531 PetscFunctionBegin; 532 PetscCall(MatSeqAIJGetArray(A, &aa)); 533 PetscCall(MatSeqAIJGetArray(B, &ba)); 534 for (i = 0; i < m; i++) { 535 if (im[i] < 0) continue; 536 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 537 if (im[i] >= rstart && im[i] < rend) { 538 row = im[i] - rstart; 539 lastcol1 = -1; 540 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 541 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 542 rmax1 = aimax[row]; 543 nrow1 = ailen[row]; 544 low1 = 0; 545 high1 = nrow1; 546 lastcol2 = -1; 547 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 548 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 549 rmax2 = bimax[row]; 550 nrow2 = bilen[row]; 551 low2 = 0; 552 high2 = nrow2; 553 554 for (j = 0; j < n; j++) { 555 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 556 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 557 if (in[j] >= cstart && in[j] < cend) { 558 col = in[j] - cstart; 559 nonew = a->nonew; 560 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 561 } else if (in[j] < 0) { 562 continue; 563 } else { 564 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 565 if (mat->was_assembled) { 566 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 567 #if defined(PETSC_USE_CTABLE) 568 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 569 col--; 570 #else 571 col = aij->colmap[in[j]] - 1; 572 #endif 573 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 574 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 575 col = in[j]; 576 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 577 B = aij->B; 578 b = (Mat_SeqAIJ *)B->data; 579 bimax = b->imax; 580 bi = b->i; 581 bilen = b->ilen; 582 bj = b->j; 583 ba = b->a; 584 rp2 = bj + bi[row]; 585 ap2 = ba + bi[row]; 586 rmax2 = bimax[row]; 587 nrow2 = bilen[row]; 588 low2 = 0; 589 high2 = nrow2; 590 bm = aij->B->rmap->n; 591 ba = b->a; 592 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 593 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 594 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 595 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 596 } 597 } else col = in[j]; 598 nonew = b->nonew; 599 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 600 } 601 } 602 } else { 603 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 604 if (!aij->donotstash) { 605 mat->assembled = PETSC_FALSE; 606 if (roworiented) { 607 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 608 } else { 609 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 610 } 611 } 612 } 613 } 614 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 615 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 /* 620 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 621 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 622 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 623 */ 624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 627 Mat A = aij->A; /* diagonal part of the matrix */ 628 Mat B = aij->B; /* off-diagonal part of the matrix */ 629 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 630 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 632 PetscInt *ailen = a->ilen, *aj = a->j; 633 PetscInt *bilen = b->ilen, *bj = b->j; 634 PetscInt am = aij->A->rmap->n, j; 635 PetscInt diag_so_far = 0, dnz; 636 PetscInt offd_so_far = 0, onz; 637 638 PetscFunctionBegin; 639 /* Iterate over all rows of the matrix */ 640 for (j = 0; j < am; j++) { 641 dnz = onz = 0; 642 /* Iterate over all non-zero columns of the current row */ 643 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 644 /* If column is in the diagonal */ 645 if (mat_j[col] >= cstart && mat_j[col] < cend) { 646 aj[diag_so_far++] = mat_j[col] - cstart; 647 dnz++; 648 } else { /* off-diagonal entries */ 649 bj[offd_so_far++] = mat_j[col]; 650 onz++; 651 } 652 } 653 ailen[j] = dnz; 654 bilen[j] = onz; 655 } 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 /* 660 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 661 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 662 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 663 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 664 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 665 */ 666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 669 Mat A = aij->A; /* diagonal part of the matrix */ 670 Mat B = aij->B; /* off-diagonal part of the matrix */ 671 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 673 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 674 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 675 PetscInt *ailen = a->ilen, *aj = a->j; 676 PetscInt *bilen = b->ilen, *bj = b->j; 677 PetscInt am = aij->A->rmap->n, j; 678 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 679 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 680 PetscScalar *aa = a->a, *ba = b->a; 681 682 PetscFunctionBegin; 683 /* Iterate over all rows of the matrix */ 684 for (j = 0; j < am; j++) { 685 dnz_row = onz_row = 0; 686 rowstart_offd = full_offd_i[j]; 687 rowstart_diag = full_diag_i[j]; 688 /* Iterate over all non-zero columns of the current row */ 689 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 690 /* If column is in the diagonal */ 691 if (mat_j[col] >= cstart && mat_j[col] < cend) { 692 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 693 aa[rowstart_diag + dnz_row] = mat_a[col]; 694 dnz_row++; 695 } else { /* off-diagonal entries */ 696 bj[rowstart_offd + onz_row] = mat_j[col]; 697 ba[rowstart_offd + onz_row] = mat_a[col]; 698 onz_row++; 699 } 700 } 701 ailen[j] = dnz_row; 702 bilen[j] = onz_row; 703 } 704 PetscFunctionReturn(PETSC_SUCCESS); 705 } 706 707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 708 { 709 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 710 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 711 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 712 713 PetscFunctionBegin; 714 for (i = 0; i < m; i++) { 715 if (idxm[i] < 0) continue; /* negative row */ 716 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 717 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 718 row = idxm[i] - rstart; 719 for (j = 0; j < n; j++) { 720 if (idxn[j] < 0) continue; /* negative column */ 721 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 722 if (idxn[j] >= cstart && idxn[j] < cend) { 723 col = idxn[j] - cstart; 724 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 725 } else { 726 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 727 #if defined(PETSC_USE_CTABLE) 728 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 729 col--; 730 #else 731 col = aij->colmap[idxn[j]] - 1; 732 #endif 733 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 734 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 735 } 736 } 737 } 738 PetscFunctionReturn(PETSC_SUCCESS); 739 } 740 741 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 744 PetscInt nstash, reallocs; 745 746 PetscFunctionBegin; 747 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 748 749 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 750 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 751 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 758 PetscMPIInt n; 759 PetscInt i, j, rstart, ncols, flg; 760 PetscInt *row, *col; 761 PetscBool other_disassembled; 762 PetscScalar *val; 763 764 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 765 766 PetscFunctionBegin; 767 if (!aij->donotstash && !mat->nooffprocentries) { 768 while (1) { 769 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 770 if (!flg) break; 771 772 for (i = 0; i < n;) { 773 /* Now identify the consecutive vals belonging to the same row */ 774 for (j = i, rstart = row[j]; j < n; j++) { 775 if (row[j] != rstart) break; 776 } 777 if (j < n) ncols = j - i; 778 else ncols = n - i; 779 /* Now assemble all these values with a single function call */ 780 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 781 i = j; 782 } 783 } 784 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 785 } 786 #if defined(PETSC_HAVE_DEVICE) 787 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 788 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 789 if (mat->boundtocpu) { 790 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 791 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 792 } 793 #endif 794 PetscCall(MatAssemblyBegin(aij->A, mode)); 795 PetscCall(MatAssemblyEnd(aij->A, mode)); 796 797 /* determine if any processor has disassembled, if so we must 798 also disassemble ourself, in order that we may reassemble. */ 799 /* 800 if nonzero structure of submatrix B cannot change then we know that 801 no processor disassembled thus we can skip this stuff 802 */ 803 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 804 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 805 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 806 PetscCall(MatDisAssemble_MPIAIJ(mat)); 807 } 808 } 809 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 810 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 811 #if defined(PETSC_HAVE_DEVICE) 812 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 813 #endif 814 PetscCall(MatAssemblyBegin(aij->B, mode)); 815 PetscCall(MatAssemblyEnd(aij->B, mode)); 816 817 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 818 819 aij->rowvalues = NULL; 820 821 PetscCall(VecDestroy(&aij->diag)); 822 823 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 824 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 825 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 826 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 827 } 828 #if defined(PETSC_HAVE_DEVICE) 829 mat->offloadmask = PETSC_OFFLOAD_BOTH; 830 #endif 831 PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 835 { 836 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 837 838 PetscFunctionBegin; 839 PetscCall(MatZeroEntries(l->A)); 840 PetscCall(MatZeroEntries(l->B)); 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 845 { 846 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 847 PetscObjectState sA, sB; 848 PetscInt *lrows; 849 PetscInt r, len; 850 PetscBool cong, lch, gch; 851 852 PetscFunctionBegin; 853 /* get locally owned rows */ 854 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 855 PetscCall(MatHasCongruentLayouts(A, &cong)); 856 /* fix right hand side if needed */ 857 if (x && b) { 858 const PetscScalar *xx; 859 PetscScalar *bb; 860 861 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 862 PetscCall(VecGetArrayRead(x, &xx)); 863 PetscCall(VecGetArray(b, &bb)); 864 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 865 PetscCall(VecRestoreArrayRead(x, &xx)); 866 PetscCall(VecRestoreArray(b, &bb)); 867 } 868 869 sA = mat->A->nonzerostate; 870 sB = mat->B->nonzerostate; 871 872 if (diag != 0.0 && cong) { 873 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 874 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 875 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 876 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 877 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 878 PetscInt nnwA, nnwB; 879 PetscBool nnzA, nnzB; 880 881 nnwA = aijA->nonew; 882 nnwB = aijB->nonew; 883 nnzA = aijA->keepnonzeropattern; 884 nnzB = aijB->keepnonzeropattern; 885 if (!nnzA) { 886 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 887 aijA->nonew = 0; 888 } 889 if (!nnzB) { 890 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 891 aijB->nonew = 0; 892 } 893 /* Must zero here before the next loop */ 894 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 895 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 896 for (r = 0; r < len; ++r) { 897 const PetscInt row = lrows[r] + A->rmap->rstart; 898 if (row >= A->cmap->N) continue; 899 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 900 } 901 aijA->nonew = nnwA; 902 aijB->nonew = nnwB; 903 } else { 904 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 905 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 906 } 907 PetscCall(PetscFree(lrows)); 908 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 909 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 910 911 /* reduce nonzerostate */ 912 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 913 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 914 if (gch) A->nonzerostate++; 915 PetscFunctionReturn(PETSC_SUCCESS); 916 } 917 918 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 919 { 920 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 921 PetscMPIInt n = A->rmap->n; 922 PetscInt i, j, r, m, len = 0; 923 PetscInt *lrows, *owners = A->rmap->range; 924 PetscMPIInt p = 0; 925 PetscSFNode *rrows; 926 PetscSF sf; 927 const PetscScalar *xx; 928 PetscScalar *bb, *mask, *aij_a; 929 Vec xmask, lmask; 930 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 931 const PetscInt *aj, *ii, *ridx; 932 PetscScalar *aa; 933 934 PetscFunctionBegin; 935 /* Create SF where leaves are input rows and roots are owned rows */ 936 PetscCall(PetscMalloc1(n, &lrows)); 937 for (r = 0; r < n; ++r) lrows[r] = -1; 938 PetscCall(PetscMalloc1(N, &rrows)); 939 for (r = 0; r < N; ++r) { 940 const PetscInt idx = rows[r]; 941 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 942 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 943 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 944 } 945 rrows[r].rank = p; 946 rrows[r].index = rows[r] - owners[p]; 947 } 948 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 949 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 950 /* Collect flags for rows to be zeroed */ 951 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 952 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 953 PetscCall(PetscSFDestroy(&sf)); 954 /* Compress and put in row numbers */ 955 for (r = 0; r < n; ++r) 956 if (lrows[r] >= 0) lrows[len++] = r; 957 /* zero diagonal part of matrix */ 958 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 959 /* handle off-diagonal part of matrix */ 960 PetscCall(MatCreateVecs(A, &xmask, NULL)); 961 PetscCall(VecDuplicate(l->lvec, &lmask)); 962 PetscCall(VecGetArray(xmask, &bb)); 963 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 964 PetscCall(VecRestoreArray(xmask, &bb)); 965 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 966 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 967 PetscCall(VecDestroy(&xmask)); 968 if (x && b) { /* this code is buggy when the row and column layout don't match */ 969 PetscBool cong; 970 971 PetscCall(MatHasCongruentLayouts(A, &cong)); 972 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 973 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 974 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 975 PetscCall(VecGetArrayRead(l->lvec, &xx)); 976 PetscCall(VecGetArray(b, &bb)); 977 } 978 PetscCall(VecGetArray(lmask, &mask)); 979 /* remove zeroed rows of off-diagonal matrix */ 980 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 981 ii = aij->i; 982 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 983 /* loop over all elements of off process part of matrix zeroing removed columns*/ 984 if (aij->compressedrow.use) { 985 m = aij->compressedrow.nrows; 986 ii = aij->compressedrow.i; 987 ridx = aij->compressedrow.rindex; 988 for (i = 0; i < m; i++) { 989 n = ii[i + 1] - ii[i]; 990 aj = aij->j + ii[i]; 991 aa = aij_a + ii[i]; 992 993 for (j = 0; j < n; j++) { 994 if (PetscAbsScalar(mask[*aj])) { 995 if (b) bb[*ridx] -= *aa * xx[*aj]; 996 *aa = 0.0; 997 } 998 aa++; 999 aj++; 1000 } 1001 ridx++; 1002 } 1003 } else { /* do not use compressed row format */ 1004 m = l->B->rmap->n; 1005 for (i = 0; i < m; i++) { 1006 n = ii[i + 1] - ii[i]; 1007 aj = aij->j + ii[i]; 1008 aa = aij_a + ii[i]; 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[i] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 } 1018 } 1019 if (x && b) { 1020 PetscCall(VecRestoreArray(b, &bb)); 1021 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1022 } 1023 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1024 PetscCall(VecRestoreArray(lmask, &mask)); 1025 PetscCall(VecDestroy(&lmask)); 1026 PetscCall(PetscFree(lrows)); 1027 1028 /* only change matrix nonzero state if pattern was allowed to be changed */ 1029 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1030 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1031 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1032 } 1033 PetscFunctionReturn(PETSC_SUCCESS); 1034 } 1035 1036 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1037 { 1038 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1039 PetscInt nt; 1040 VecScatter Mvctx = a->Mvctx; 1041 1042 PetscFunctionBegin; 1043 PetscCall(VecGetLocalSize(xx, &nt)); 1044 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1045 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1046 PetscUseTypeMethod(a->A, mult, xx, yy); 1047 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1048 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 1056 PetscFunctionBegin; 1057 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1058 PetscFunctionReturn(PETSC_SUCCESS); 1059 } 1060 1061 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1062 { 1063 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1064 VecScatter Mvctx = a->Mvctx; 1065 1066 PetscFunctionBegin; 1067 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1068 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1069 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1070 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1071 PetscFunctionReturn(PETSC_SUCCESS); 1072 } 1073 1074 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1075 { 1076 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1077 1078 PetscFunctionBegin; 1079 /* do nondiagonal part */ 1080 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1081 /* do local part */ 1082 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1083 /* add partial results together */ 1084 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1085 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1090 { 1091 MPI_Comm comm; 1092 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1093 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1094 IS Me, Notme; 1095 PetscInt M, N, first, last, *notme, i; 1096 PetscBool lf; 1097 PetscMPIInt size; 1098 1099 PetscFunctionBegin; 1100 /* Easy test: symmetric diagonal block */ 1101 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1102 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1103 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1104 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1105 PetscCallMPI(MPI_Comm_size(comm, &size)); 1106 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1107 1108 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1109 PetscCall(MatGetSize(Amat, &M, &N)); 1110 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1111 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1112 for (i = 0; i < first; i++) notme[i] = i; 1113 for (i = last; i < M; i++) notme[i - last + first] = i; 1114 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1115 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1116 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1117 Aoff = Aoffs[0]; 1118 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1119 Boff = Boffs[0]; 1120 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1121 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1122 PetscCall(MatDestroyMatrices(1, &Boffs)); 1123 PetscCall(ISDestroy(&Me)); 1124 PetscCall(ISDestroy(&Notme)); 1125 PetscCall(PetscFree(notme)); 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128 1129 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1130 { 1131 PetscFunctionBegin; 1132 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1133 PetscFunctionReturn(PETSC_SUCCESS); 1134 } 1135 1136 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1137 { 1138 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1139 1140 PetscFunctionBegin; 1141 /* do nondiagonal part */ 1142 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1143 /* do local part */ 1144 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1145 /* add partial results together */ 1146 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1147 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1148 PetscFunctionReturn(PETSC_SUCCESS); 1149 } 1150 1151 /* 1152 This only works correctly for square matrices where the subblock A->A is the 1153 diagonal block 1154 */ 1155 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1161 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1162 PetscCall(MatGetDiagonal(a->A, v)); 1163 PetscFunctionReturn(PETSC_SUCCESS); 1164 } 1165 1166 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1167 { 1168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCall(MatScale(a->A, aa)); 1172 PetscCall(MatScale(a->B, aa)); 1173 PetscFunctionReturn(PETSC_SUCCESS); 1174 } 1175 1176 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1177 { 1178 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1179 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1180 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1181 const PetscInt *garray = aij->garray; 1182 const PetscScalar *aa, *ba; 1183 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1184 PetscInt64 nz, hnz; 1185 PetscInt *rowlens; 1186 PetscInt *colidxs; 1187 PetscScalar *matvals; 1188 PetscMPIInt rank; 1189 1190 PetscFunctionBegin; 1191 PetscCall(PetscViewerSetUp(viewer)); 1192 1193 M = mat->rmap->N; 1194 N = mat->cmap->N; 1195 m = mat->rmap->n; 1196 rs = mat->rmap->rstart; 1197 cs = mat->cmap->rstart; 1198 nz = A->nz + B->nz; 1199 1200 /* write matrix header */ 1201 header[0] = MAT_FILE_CLASSID; 1202 header[1] = M; 1203 header[2] = N; 1204 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1205 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1206 if (rank == 0) { 1207 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1208 else header[3] = (PetscInt)hnz; 1209 } 1210 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1211 1212 /* fill in and store row lengths */ 1213 PetscCall(PetscMalloc1(m, &rowlens)); 1214 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1215 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1216 PetscCall(PetscFree(rowlens)); 1217 1218 /* fill in and store column indices */ 1219 PetscCall(PetscMalloc1(nz, &colidxs)); 1220 for (cnt = 0, i = 0; i < m; i++) { 1221 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1222 if (garray[B->j[jb]] > cs) break; 1223 colidxs[cnt++] = garray[B->j[jb]]; 1224 } 1225 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1226 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1227 } 1228 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1229 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1230 PetscCall(PetscFree(colidxs)); 1231 1232 /* fill in and store nonzero values */ 1233 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1235 PetscCall(PetscMalloc1(nz, &matvals)); 1236 for (cnt = 0, i = 0; i < m; i++) { 1237 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1242 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1243 } 1244 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1245 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1246 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1247 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1248 PetscCall(PetscFree(matvals)); 1249 1250 /* write block size option to the viewer's .info file */ 1251 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1252 PetscFunctionReturn(PETSC_SUCCESS); 1253 } 1254 1255 #include <petscdraw.h> 1256 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1257 { 1258 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1259 PetscMPIInt rank = aij->rank, size = aij->size; 1260 PetscBool isdraw, iascii, isbinary; 1261 PetscViewer sviewer; 1262 PetscViewerFormat format; 1263 1264 PetscFunctionBegin; 1265 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1266 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1267 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1268 if (iascii) { 1269 PetscCall(PetscViewerGetFormat(viewer, &format)); 1270 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1271 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1272 PetscCall(PetscMalloc1(size, &nz)); 1273 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1274 for (i = 0; i < (PetscInt)size; i++) { 1275 nmax = PetscMax(nmax, nz[i]); 1276 nmin = PetscMin(nmin, nz[i]); 1277 navg += nz[i]; 1278 } 1279 PetscCall(PetscFree(nz)); 1280 navg = navg / size; 1281 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 PetscCall(PetscViewerGetFormat(viewer, &format)); 1285 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1286 MatInfo info; 1287 PetscInt *inodes = NULL; 1288 1289 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1290 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1291 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1292 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1293 if (!inodes) { 1294 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1295 (double)info.memory)); 1296 } else { 1297 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1298 (double)info.memory)); 1299 } 1300 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1301 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1302 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1303 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1304 PetscCall(PetscViewerFlush(viewer)); 1305 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1306 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1307 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1310 PetscInt inodecount, inodelimit, *inodes; 1311 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1312 if (inodes) { 1313 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1314 } else { 1315 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } 1321 } else if (isbinary) { 1322 if (size == 1) { 1323 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1324 PetscCall(MatView(aij->A, viewer)); 1325 } else { 1326 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1327 } 1328 PetscFunctionReturn(PETSC_SUCCESS); 1329 } else if (iascii && size == 1) { 1330 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1331 PetscCall(MatView(aij->A, viewer)); 1332 PetscFunctionReturn(PETSC_SUCCESS); 1333 } else if (isdraw) { 1334 PetscDraw draw; 1335 PetscBool isnull; 1336 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1337 PetscCall(PetscDrawIsNull(draw, &isnull)); 1338 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1339 } 1340 1341 { /* assemble the entire matrix onto first processor */ 1342 Mat A = NULL, Av; 1343 IS isrow, iscol; 1344 1345 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1346 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1347 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1348 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1349 /* The commented code uses MatCreateSubMatrices instead */ 1350 /* 1351 Mat *AA, A = NULL, Av; 1352 IS isrow,iscol; 1353 1354 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1355 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1356 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1357 if (rank == 0) { 1358 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1359 A = AA[0]; 1360 Av = AA[0]; 1361 } 1362 PetscCall(MatDestroySubMatrices(1,&AA)); 1363 */ 1364 PetscCall(ISDestroy(&iscol)); 1365 PetscCall(ISDestroy(&isrow)); 1366 /* 1367 Everyone has to call to draw the matrix since the graphics waits are 1368 synchronized across all processors that share the PetscDraw object 1369 */ 1370 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1371 if (rank == 0) { 1372 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1373 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1374 } 1375 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1376 PetscCall(MatDestroy(&A)); 1377 } 1378 PetscFunctionReturn(PETSC_SUCCESS); 1379 } 1380 1381 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1382 { 1383 PetscBool iascii, isdraw, issocket, isbinary; 1384 1385 PetscFunctionBegin; 1386 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1387 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1388 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1389 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1390 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1391 PetscFunctionReturn(PETSC_SUCCESS); 1392 } 1393 1394 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1395 { 1396 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1397 Vec bb1 = NULL; 1398 PetscBool hasop; 1399 1400 PetscFunctionBegin; 1401 if (flag == SOR_APPLY_UPPER) { 1402 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1403 PetscFunctionReturn(PETSC_SUCCESS); 1404 } 1405 1406 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1407 1408 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1409 if (flag & SOR_ZERO_INITIAL_GUESS) { 1410 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1411 its--; 1412 } 1413 1414 while (its--) { 1415 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1416 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1417 1418 /* update rhs: bb1 = bb - B*x */ 1419 PetscCall(VecScale(mat->lvec, -1.0)); 1420 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1421 1422 /* local sweep */ 1423 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1424 } 1425 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1426 if (flag & SOR_ZERO_INITIAL_GUESS) { 1427 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1428 its--; 1429 } 1430 while (its--) { 1431 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1432 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1433 1434 /* update rhs: bb1 = bb - B*x */ 1435 PetscCall(VecScale(mat->lvec, -1.0)); 1436 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1437 1438 /* local sweep */ 1439 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1440 } 1441 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1442 if (flag & SOR_ZERO_INITIAL_GUESS) { 1443 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1444 its--; 1445 } 1446 while (its--) { 1447 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1448 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1449 1450 /* update rhs: bb1 = bb - B*x */ 1451 PetscCall(VecScale(mat->lvec, -1.0)); 1452 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1453 1454 /* local sweep */ 1455 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1456 } 1457 } else if (flag & SOR_EISENSTAT) { 1458 Vec xx1; 1459 1460 PetscCall(VecDuplicate(bb, &xx1)); 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1462 1463 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1464 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1465 if (!mat->diag) { 1466 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1467 PetscCall(MatGetDiagonal(matin, mat->diag)); 1468 } 1469 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1470 if (hasop) { 1471 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1472 } else { 1473 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1474 } 1475 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1476 1477 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1478 1479 /* local sweep */ 1480 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1481 PetscCall(VecAXPY(xx, 1.0, xx1)); 1482 PetscCall(VecDestroy(&xx1)); 1483 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1484 1485 PetscCall(VecDestroy(&bb1)); 1486 1487 matin->factorerrortype = mat->A->factorerrortype; 1488 PetscFunctionReturn(PETSC_SUCCESS); 1489 } 1490 1491 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1492 { 1493 Mat aA, aB, Aperm; 1494 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1495 PetscScalar *aa, *ba; 1496 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1497 PetscSF rowsf, sf; 1498 IS parcolp = NULL; 1499 PetscBool done; 1500 1501 PetscFunctionBegin; 1502 PetscCall(MatGetLocalSize(A, &m, &n)); 1503 PetscCall(ISGetIndices(rowp, &rwant)); 1504 PetscCall(ISGetIndices(colp, &cwant)); 1505 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1506 1507 /* Invert row permutation to find out where my rows should go */ 1508 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1509 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1510 PetscCall(PetscSFSetFromOptions(rowsf)); 1511 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1512 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1513 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1514 1515 /* Invert column permutation to find out where my columns should go */ 1516 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1517 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1518 PetscCall(PetscSFSetFromOptions(sf)); 1519 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1520 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1521 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1522 PetscCall(PetscSFDestroy(&sf)); 1523 1524 PetscCall(ISRestoreIndices(rowp, &rwant)); 1525 PetscCall(ISRestoreIndices(colp, &cwant)); 1526 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1527 1528 /* Find out where my gcols should go */ 1529 PetscCall(MatGetSize(aB, NULL, &ng)); 1530 PetscCall(PetscMalloc1(ng, &gcdest)); 1531 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1532 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1533 PetscCall(PetscSFSetFromOptions(sf)); 1534 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1535 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1536 PetscCall(PetscSFDestroy(&sf)); 1537 1538 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1539 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1540 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1541 for (i = 0; i < m; i++) { 1542 PetscInt row = rdest[i]; 1543 PetscMPIInt rowner; 1544 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1545 for (j = ai[i]; j < ai[i + 1]; j++) { 1546 PetscInt col = cdest[aj[j]]; 1547 PetscMPIInt cowner; 1548 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1549 if (rowner == cowner) dnnz[i]++; 1550 else onnz[i]++; 1551 } 1552 for (j = bi[i]; j < bi[i + 1]; j++) { 1553 PetscInt col = gcdest[bj[j]]; 1554 PetscMPIInt cowner; 1555 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1556 if (rowner == cowner) dnnz[i]++; 1557 else onnz[i]++; 1558 } 1559 } 1560 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1561 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1562 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1563 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1564 PetscCall(PetscSFDestroy(&rowsf)); 1565 1566 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1567 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1568 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1569 for (i = 0; i < m; i++) { 1570 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1571 PetscInt j0, rowlen; 1572 rowlen = ai[i + 1] - ai[i]; 1573 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1574 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1575 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1576 } 1577 rowlen = bi[i + 1] - bi[i]; 1578 for (j0 = j = 0; j < rowlen; j0 = j) { 1579 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1580 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1581 } 1582 } 1583 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1584 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1585 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1586 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1587 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1588 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1589 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1590 PetscCall(PetscFree3(work, rdest, cdest)); 1591 PetscCall(PetscFree(gcdest)); 1592 if (parcolp) PetscCall(ISDestroy(&colp)); 1593 *B = Aperm; 1594 PetscFunctionReturn(PETSC_SUCCESS); 1595 } 1596 1597 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1598 { 1599 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1600 1601 PetscFunctionBegin; 1602 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1603 if (ghosts) *ghosts = aij->garray; 1604 PetscFunctionReturn(PETSC_SUCCESS); 1605 } 1606 1607 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1608 { 1609 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1610 Mat A = mat->A, B = mat->B; 1611 PetscLogDouble isend[5], irecv[5]; 1612 1613 PetscFunctionBegin; 1614 info->block_size = 1.0; 1615 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1616 1617 isend[0] = info->nz_used; 1618 isend[1] = info->nz_allocated; 1619 isend[2] = info->nz_unneeded; 1620 isend[3] = info->memory; 1621 isend[4] = info->mallocs; 1622 1623 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1624 1625 isend[0] += info->nz_used; 1626 isend[1] += info->nz_allocated; 1627 isend[2] += info->nz_unneeded; 1628 isend[3] += info->memory; 1629 isend[4] += info->mallocs; 1630 if (flag == MAT_LOCAL) { 1631 info->nz_used = isend[0]; 1632 info->nz_allocated = isend[1]; 1633 info->nz_unneeded = isend[2]; 1634 info->memory = isend[3]; 1635 info->mallocs = isend[4]; 1636 } else if (flag == MAT_GLOBAL_MAX) { 1637 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1638 1639 info->nz_used = irecv[0]; 1640 info->nz_allocated = irecv[1]; 1641 info->nz_unneeded = irecv[2]; 1642 info->memory = irecv[3]; 1643 info->mallocs = irecv[4]; 1644 } else if (flag == MAT_GLOBAL_SUM) { 1645 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1646 1647 info->nz_used = irecv[0]; 1648 info->nz_allocated = irecv[1]; 1649 info->nz_unneeded = irecv[2]; 1650 info->memory = irecv[3]; 1651 info->mallocs = irecv[4]; 1652 } 1653 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1654 info->fill_ratio_needed = 0; 1655 info->factor_mallocs = 0; 1656 PetscFunctionReturn(PETSC_SUCCESS); 1657 } 1658 1659 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1660 { 1661 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1662 1663 PetscFunctionBegin; 1664 switch (op) { 1665 case MAT_NEW_NONZERO_LOCATIONS: 1666 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1667 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1668 case MAT_KEEP_NONZERO_PATTERN: 1669 case MAT_NEW_NONZERO_LOCATION_ERR: 1670 case MAT_USE_INODES: 1671 case MAT_IGNORE_ZERO_ENTRIES: 1672 case MAT_FORM_EXPLICIT_TRANSPOSE: 1673 MatCheckPreallocated(A, 1); 1674 PetscCall(MatSetOption(a->A, op, flg)); 1675 PetscCall(MatSetOption(a->B, op, flg)); 1676 break; 1677 case MAT_ROW_ORIENTED: 1678 MatCheckPreallocated(A, 1); 1679 a->roworiented = flg; 1680 1681 PetscCall(MatSetOption(a->A, op, flg)); 1682 PetscCall(MatSetOption(a->B, op, flg)); 1683 break; 1684 case MAT_FORCE_DIAGONAL_ENTRIES: 1685 case MAT_SORTED_FULL: 1686 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1687 break; 1688 case MAT_IGNORE_OFF_PROC_ENTRIES: 1689 a->donotstash = flg; 1690 break; 1691 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1692 case MAT_SPD: 1693 case MAT_SYMMETRIC: 1694 case MAT_STRUCTURALLY_SYMMETRIC: 1695 case MAT_HERMITIAN: 1696 case MAT_SYMMETRY_ETERNAL: 1697 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1698 case MAT_SPD_ETERNAL: 1699 /* if the diagonal matrix is square it inherits some of the properties above */ 1700 break; 1701 case MAT_SUBMAT_SINGLEIS: 1702 A->submat_singleis = flg; 1703 break; 1704 case MAT_STRUCTURE_ONLY: 1705 /* The option is handled directly by MatSetOption() */ 1706 break; 1707 default: 1708 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1716 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1717 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1718 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1719 PetscInt *cmap, *idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1730 PetscInt max = 1, tmp; 1731 for (i = 0; i < matin->rmap->n; i++) { 1732 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1736 } 1737 1738 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; 1742 pcA = &cworkA; 1743 pvB = &vworkB; 1744 pcB = &cworkB; 1745 if (!v) { 1746 pvA = NULL; 1747 pvB = NULL; 1748 } 1749 if (!idx) { 1750 pcA = NULL; 1751 if (!v) pcB = NULL; 1752 } 1753 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1770 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1776 } else { 1777 for (i = 0; i < nzB; i++) { 1778 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1779 else break; 1780 } 1781 imark = i; 1782 } 1783 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1784 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1785 } 1786 } else { 1787 if (idx) *idx = NULL; 1788 if (v) *v = NULL; 1789 } 1790 } 1791 *nz = nztot; 1792 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1793 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 1801 PetscFunctionBegin; 1802 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1803 aij->getrowactive = PETSC_FALSE; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1808 { 1809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1810 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1811 PetscInt i, j, cstart = mat->cmap->rstart; 1812 PetscReal sum = 0.0; 1813 const MatScalar *v, *amata, *bmata; 1814 1815 PetscFunctionBegin; 1816 if (aij->size == 1) { 1817 PetscCall(MatNorm(aij->A, type, norm)); 1818 } else { 1819 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1820 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1821 if (type == NORM_FROBENIUS) { 1822 v = amata; 1823 for (i = 0; i < amat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v) * (*v)); 1825 v++; 1826 } 1827 v = bmata; 1828 for (i = 0; i < bmat->nz; i++) { 1829 sum += PetscRealPart(PetscConj(*v) * (*v)); 1830 v++; 1831 } 1832 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp, *tmp2; 1837 PetscInt *jj, *garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1839 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1840 *norm = 0.0; 1841 v = amata; 1842 jj = amat->j; 1843 for (j = 0; j < amat->nz; j++) { 1844 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1845 v++; 1846 } 1847 v = bmata; 1848 jj = bmat->j; 1849 for (j = 0; j < bmat->nz; j++) { 1850 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1851 v++; 1852 } 1853 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1854 for (j = 0; j < mat->cmap->N; j++) { 1855 if (tmp2[j] > *norm) *norm = tmp2[j]; 1856 } 1857 PetscCall(PetscFree(tmp)); 1858 PetscCall(PetscFree(tmp2)); 1859 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1860 } else if (type == NORM_INFINITY) { /* max row norm */ 1861 PetscReal ntemp = 0.0; 1862 for (j = 0; j < aij->A->rmap->n; j++) { 1863 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1864 sum = 0.0; 1865 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1866 sum += PetscAbsScalar(*v); 1867 v++; 1868 } 1869 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1870 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1871 sum += PetscAbsScalar(*v); 1872 v++; 1873 } 1874 if (sum > ntemp) ntemp = sum; 1875 } 1876 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1877 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1878 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1879 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1880 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1881 } 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1886 { 1887 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1888 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1889 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1890 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1891 Mat B, A_diag, *B_diag; 1892 const MatScalar *pbv, *bv; 1893 1894 PetscFunctionBegin; 1895 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1896 ma = A->rmap->n; 1897 na = A->cmap->n; 1898 mb = a->B->rmap->n; 1899 nb = a->B->cmap->n; 1900 ai = Aloc->i; 1901 aj = Aloc->j; 1902 bi = Bloc->i; 1903 bj = Bloc->j; 1904 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1905 PetscInt *d_nnz, *g_nnz, *o_nnz; 1906 PetscSFNode *oloc; 1907 PETSC_UNUSED PetscSF sf; 1908 1909 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1910 /* compute d_nnz for preallocation */ 1911 PetscCall(PetscArrayzero(d_nnz, na)); 1912 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1913 /* compute local off-diagonal contributions */ 1914 PetscCall(PetscArrayzero(g_nnz, nb)); 1915 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1916 /* map those to global */ 1917 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1918 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1919 PetscCall(PetscSFSetFromOptions(sf)); 1920 PetscCall(PetscArrayzero(o_nnz, na)); 1921 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1922 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1923 PetscCall(PetscSFDestroy(&sf)); 1924 1925 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1926 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1927 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1928 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1929 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1930 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1931 } else { 1932 B = *matout; 1933 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1934 } 1935 1936 b = (Mat_MPIAIJ *)B->data; 1937 A_diag = a->A; 1938 B_diag = &b->A; 1939 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1940 A_diag_ncol = A_diag->cmap->N; 1941 B_diag_ilen = sub_B_diag->ilen; 1942 B_diag_i = sub_B_diag->i; 1943 1944 /* Set ilen for diagonal of B */ 1945 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1946 1947 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1948 very quickly (=without using MatSetValues), because all writes are local. */ 1949 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1950 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1951 1952 /* copy over the B part */ 1953 PetscCall(PetscMalloc1(bi[mb], &cols)); 1954 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1955 pbv = bv; 1956 row = A->rmap->rstart; 1957 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1958 cols_tmp = cols; 1959 for (i = 0; i < mb; i++) { 1960 ncol = bi[i + 1] - bi[i]; 1961 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1962 row++; 1963 if (pbv) pbv += ncol; 1964 if (cols_tmp) cols_tmp += ncol; 1965 } 1966 PetscCall(PetscFree(cols)); 1967 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1968 1969 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1970 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1971 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1972 *matout = B; 1973 } else { 1974 PetscCall(MatHeaderMerge(A, &B)); 1975 } 1976 PetscFunctionReturn(PETSC_SUCCESS); 1977 } 1978 1979 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1980 { 1981 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1982 Mat a = aij->A, b = aij->B; 1983 PetscInt s1, s2, s3; 1984 1985 PetscFunctionBegin; 1986 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1987 if (rr) { 1988 PetscCall(VecGetLocalSize(rr, &s1)); 1989 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1990 /* Overlap communication with computation. */ 1991 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1992 } 1993 if (ll) { 1994 PetscCall(VecGetLocalSize(ll, &s1)); 1995 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1996 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1997 } 1998 /* scale the diagonal block */ 1999 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2000 2001 if (rr) { 2002 /* Do a scatter end and then right scale the off-diagonal block */ 2003 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2004 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2005 } 2006 PetscFunctionReturn(PETSC_SUCCESS); 2007 } 2008 2009 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2010 { 2011 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2012 2013 PetscFunctionBegin; 2014 PetscCall(MatSetUnfactored(a->A)); 2015 PetscFunctionReturn(PETSC_SUCCESS); 2016 } 2017 2018 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2019 { 2020 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2021 Mat a, b, c, d; 2022 PetscBool flg; 2023 2024 PetscFunctionBegin; 2025 a = matA->A; 2026 b = matA->B; 2027 c = matB->A; 2028 d = matB->B; 2029 2030 PetscCall(MatEqual(a, c, &flg)); 2031 if (flg) PetscCall(MatEqual(b, d, &flg)); 2032 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2033 PetscFunctionReturn(PETSC_SUCCESS); 2034 } 2035 2036 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2037 { 2038 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2039 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2040 2041 PetscFunctionBegin; 2042 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2043 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2044 /* because of the column compression in the off-processor part of the matrix a->B, 2045 the number of columns in a->B and b->B may be different, hence we cannot call 2046 the MatCopy() directly on the two parts. If need be, we can provide a more 2047 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2048 then copying the submatrices */ 2049 PetscCall(MatCopy_Basic(A, B, str)); 2050 } else { 2051 PetscCall(MatCopy(a->A, b->A, str)); 2052 PetscCall(MatCopy(a->B, b->B, str)); 2053 } 2054 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2055 PetscFunctionReturn(PETSC_SUCCESS); 2056 } 2057 2058 /* 2059 Computes the number of nonzeros per row needed for preallocation when X and Y 2060 have different nonzero structure. 2061 */ 2062 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2063 { 2064 PetscInt i, j, k, nzx, nzy; 2065 2066 PetscFunctionBegin; 2067 /* Set the number of nonzeros in the new matrix */ 2068 for (i = 0; i < m; i++) { 2069 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2070 nzx = xi[i + 1] - xi[i]; 2071 nzy = yi[i + 1] - yi[i]; 2072 nnz[i] = 0; 2073 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2074 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2075 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2076 nnz[i]++; 2077 } 2078 for (; k < nzy; k++) nnz[i]++; 2079 } 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2084 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2085 { 2086 PetscInt m = Y->rmap->N; 2087 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2088 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2089 2090 PetscFunctionBegin; 2091 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2092 PetscFunctionReturn(PETSC_SUCCESS); 2093 } 2094 2095 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2096 { 2097 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2098 2099 PetscFunctionBegin; 2100 if (str == SAME_NONZERO_PATTERN) { 2101 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2102 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2103 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2104 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2105 } else { 2106 Mat B; 2107 PetscInt *nnz_d, *nnz_o; 2108 2109 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2110 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2111 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2112 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2113 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2114 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2115 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2116 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2117 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2118 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2119 PetscCall(MatHeaderMerge(Y, &B)); 2120 PetscCall(PetscFree(nnz_d)); 2121 PetscCall(PetscFree(nnz_o)); 2122 } 2123 PetscFunctionReturn(PETSC_SUCCESS); 2124 } 2125 2126 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2127 2128 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2129 { 2130 PetscFunctionBegin; 2131 if (PetscDefined(USE_COMPLEX)) { 2132 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2133 2134 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2135 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2136 } 2137 PetscFunctionReturn(PETSC_SUCCESS); 2138 } 2139 2140 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2141 { 2142 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2143 2144 PetscFunctionBegin; 2145 PetscCall(MatRealPart(a->A)); 2146 PetscCall(MatRealPart(a->B)); 2147 PetscFunctionReturn(PETSC_SUCCESS); 2148 } 2149 2150 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatImaginaryPart(a->A)); 2156 PetscCall(MatImaginaryPart(a->B)); 2157 PetscFunctionReturn(PETSC_SUCCESS); 2158 } 2159 2160 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2163 PetscInt i, *idxb = NULL, m = A->rmap->n; 2164 PetscScalar *va, *vv; 2165 Vec vB, vA; 2166 const PetscScalar *vb; 2167 2168 PetscFunctionBegin; 2169 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2170 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2171 2172 PetscCall(VecGetArrayWrite(vA, &va)); 2173 if (idx) { 2174 for (i = 0; i < m; i++) { 2175 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2176 } 2177 } 2178 2179 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2180 PetscCall(PetscMalloc1(m, &idxb)); 2181 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2182 2183 PetscCall(VecGetArrayWrite(v, &vv)); 2184 PetscCall(VecGetArrayRead(vB, &vb)); 2185 for (i = 0; i < m; i++) { 2186 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2187 vv[i] = vb[i]; 2188 if (idx) idx[i] = a->garray[idxb[i]]; 2189 } else { 2190 vv[i] = va[i]; 2191 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2192 } 2193 } 2194 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2195 PetscCall(VecRestoreArrayWrite(vA, &va)); 2196 PetscCall(VecRestoreArrayRead(vB, &vb)); 2197 PetscCall(PetscFree(idxb)); 2198 PetscCall(VecDestroy(&vA)); 2199 PetscCall(VecDestroy(&vB)); 2200 PetscFunctionReturn(PETSC_SUCCESS); 2201 } 2202 2203 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2204 { 2205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2206 PetscInt m = A->rmap->n, n = A->cmap->n; 2207 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2208 PetscInt *cmap = mat->garray; 2209 PetscInt *diagIdx, *offdiagIdx; 2210 Vec diagV, offdiagV; 2211 PetscScalar *a, *diagA, *offdiagA; 2212 const PetscScalar *ba, *bav; 2213 PetscInt r, j, col, ncols, *bi, *bj; 2214 Mat B = mat->B; 2215 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2216 2217 PetscFunctionBegin; 2218 /* When a process holds entire A and other processes have no entry */ 2219 if (A->cmap->N == n) { 2220 PetscCall(VecGetArrayWrite(v, &diagA)); 2221 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2222 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2223 PetscCall(VecDestroy(&diagV)); 2224 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2225 PetscFunctionReturn(PETSC_SUCCESS); 2226 } else if (n == 0) { 2227 if (m) { 2228 PetscCall(VecGetArrayWrite(v, &a)); 2229 for (r = 0; r < m; r++) { 2230 a[r] = 0.0; 2231 if (idx) idx[r] = -1; 2232 } 2233 PetscCall(VecRestoreArrayWrite(v, &a)); 2234 } 2235 PetscFunctionReturn(PETSC_SUCCESS); 2236 } 2237 2238 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2239 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2240 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2241 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2242 2243 /* Get offdiagIdx[] for implicit 0.0 */ 2244 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2245 ba = bav; 2246 bi = b->i; 2247 bj = b->j; 2248 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2249 for (r = 0; r < m; r++) { 2250 ncols = bi[r + 1] - bi[r]; 2251 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2252 offdiagA[r] = *ba; 2253 offdiagIdx[r] = cmap[0]; 2254 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2255 offdiagA[r] = 0.0; 2256 2257 /* Find first hole in the cmap */ 2258 for (j = 0; j < ncols; j++) { 2259 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2260 if (col > j && j < cstart) { 2261 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2262 break; 2263 } else if (col > j + n && j >= cstart) { 2264 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2265 break; 2266 } 2267 } 2268 if (j == ncols && ncols < A->cmap->N - n) { 2269 /* a hole is outside compressed Bcols */ 2270 if (ncols == 0) { 2271 if (cstart) { 2272 offdiagIdx[r] = 0; 2273 } else offdiagIdx[r] = cend; 2274 } else { /* ncols > 0 */ 2275 offdiagIdx[r] = cmap[ncols - 1] + 1; 2276 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2277 } 2278 } 2279 } 2280 2281 for (j = 0; j < ncols; j++) { 2282 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2283 offdiagA[r] = *ba; 2284 offdiagIdx[r] = cmap[*bj]; 2285 } 2286 ba++; 2287 bj++; 2288 } 2289 } 2290 2291 PetscCall(VecGetArrayWrite(v, &a)); 2292 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2293 for (r = 0; r < m; ++r) { 2294 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2295 a[r] = diagA[r]; 2296 if (idx) idx[r] = cstart + diagIdx[r]; 2297 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2298 a[r] = diagA[r]; 2299 if (idx) { 2300 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2301 idx[r] = cstart + diagIdx[r]; 2302 } else idx[r] = offdiagIdx[r]; 2303 } 2304 } else { 2305 a[r] = offdiagA[r]; 2306 if (idx) idx[r] = offdiagIdx[r]; 2307 } 2308 } 2309 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2310 PetscCall(VecRestoreArrayWrite(v, &a)); 2311 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2312 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2313 PetscCall(VecDestroy(&diagV)); 2314 PetscCall(VecDestroy(&offdiagV)); 2315 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2316 PetscFunctionReturn(PETSC_SUCCESS); 2317 } 2318 2319 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2320 { 2321 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2322 PetscInt m = A->rmap->n, n = A->cmap->n; 2323 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2324 PetscInt *cmap = mat->garray; 2325 PetscInt *diagIdx, *offdiagIdx; 2326 Vec diagV, offdiagV; 2327 PetscScalar *a, *diagA, *offdiagA; 2328 const PetscScalar *ba, *bav; 2329 PetscInt r, j, col, ncols, *bi, *bj; 2330 Mat B = mat->B; 2331 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2332 2333 PetscFunctionBegin; 2334 /* When a process holds entire A and other processes have no entry */ 2335 if (A->cmap->N == n) { 2336 PetscCall(VecGetArrayWrite(v, &diagA)); 2337 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2338 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2339 PetscCall(VecDestroy(&diagV)); 2340 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2341 PetscFunctionReturn(PETSC_SUCCESS); 2342 } else if (n == 0) { 2343 if (m) { 2344 PetscCall(VecGetArrayWrite(v, &a)); 2345 for (r = 0; r < m; r++) { 2346 a[r] = PETSC_MAX_REAL; 2347 if (idx) idx[r] = -1; 2348 } 2349 PetscCall(VecRestoreArrayWrite(v, &a)); 2350 } 2351 PetscFunctionReturn(PETSC_SUCCESS); 2352 } 2353 2354 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2355 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2357 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2358 2359 /* Get offdiagIdx[] for implicit 0.0 */ 2360 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2361 ba = bav; 2362 bi = b->i; 2363 bj = b->j; 2364 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2365 for (r = 0; r < m; r++) { 2366 ncols = bi[r + 1] - bi[r]; 2367 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2368 offdiagA[r] = *ba; 2369 offdiagIdx[r] = cmap[0]; 2370 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2371 offdiagA[r] = 0.0; 2372 2373 /* Find first hole in the cmap */ 2374 for (j = 0; j < ncols; j++) { 2375 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2376 if (col > j && j < cstart) { 2377 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2378 break; 2379 } else if (col > j + n && j >= cstart) { 2380 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2381 break; 2382 } 2383 } 2384 if (j == ncols && ncols < A->cmap->N - n) { 2385 /* a hole is outside compressed Bcols */ 2386 if (ncols == 0) { 2387 if (cstart) { 2388 offdiagIdx[r] = 0; 2389 } else offdiagIdx[r] = cend; 2390 } else { /* ncols > 0 */ 2391 offdiagIdx[r] = cmap[ncols - 1] + 1; 2392 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2393 } 2394 } 2395 } 2396 2397 for (j = 0; j < ncols; j++) { 2398 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2399 offdiagA[r] = *ba; 2400 offdiagIdx[r] = cmap[*bj]; 2401 } 2402 ba++; 2403 bj++; 2404 } 2405 } 2406 2407 PetscCall(VecGetArrayWrite(v, &a)); 2408 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2409 for (r = 0; r < m; ++r) { 2410 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2411 a[r] = diagA[r]; 2412 if (idx) idx[r] = cstart + diagIdx[r]; 2413 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2414 a[r] = diagA[r]; 2415 if (idx) { 2416 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2417 idx[r] = cstart + diagIdx[r]; 2418 } else idx[r] = offdiagIdx[r]; 2419 } 2420 } else { 2421 a[r] = offdiagA[r]; 2422 if (idx) idx[r] = offdiagIdx[r]; 2423 } 2424 } 2425 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2426 PetscCall(VecRestoreArrayWrite(v, &a)); 2427 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2428 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2429 PetscCall(VecDestroy(&diagV)); 2430 PetscCall(VecDestroy(&offdiagV)); 2431 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2432 PetscFunctionReturn(PETSC_SUCCESS); 2433 } 2434 2435 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2436 { 2437 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2438 PetscInt m = A->rmap->n, n = A->cmap->n; 2439 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2440 PetscInt *cmap = mat->garray; 2441 PetscInt *diagIdx, *offdiagIdx; 2442 Vec diagV, offdiagV; 2443 PetscScalar *a, *diagA, *offdiagA; 2444 const PetscScalar *ba, *bav; 2445 PetscInt r, j, col, ncols, *bi, *bj; 2446 Mat B = mat->B; 2447 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2448 2449 PetscFunctionBegin; 2450 /* When a process holds entire A and other processes have no entry */ 2451 if (A->cmap->N == n) { 2452 PetscCall(VecGetArrayWrite(v, &diagA)); 2453 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2454 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2455 PetscCall(VecDestroy(&diagV)); 2456 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2457 PetscFunctionReturn(PETSC_SUCCESS); 2458 } else if (n == 0) { 2459 if (m) { 2460 PetscCall(VecGetArrayWrite(v, &a)); 2461 for (r = 0; r < m; r++) { 2462 a[r] = PETSC_MIN_REAL; 2463 if (idx) idx[r] = -1; 2464 } 2465 PetscCall(VecRestoreArrayWrite(v, &a)); 2466 } 2467 PetscFunctionReturn(PETSC_SUCCESS); 2468 } 2469 2470 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2471 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2472 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2473 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2474 2475 /* Get offdiagIdx[] for implicit 0.0 */ 2476 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2477 ba = bav; 2478 bi = b->i; 2479 bj = b->j; 2480 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2481 for (r = 0; r < m; r++) { 2482 ncols = bi[r + 1] - bi[r]; 2483 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2484 offdiagA[r] = *ba; 2485 offdiagIdx[r] = cmap[0]; 2486 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2487 offdiagA[r] = 0.0; 2488 2489 /* Find first hole in the cmap */ 2490 for (j = 0; j < ncols; j++) { 2491 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2492 if (col > j && j < cstart) { 2493 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2494 break; 2495 } else if (col > j + n && j >= cstart) { 2496 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2497 break; 2498 } 2499 } 2500 if (j == ncols && ncols < A->cmap->N - n) { 2501 /* a hole is outside compressed Bcols */ 2502 if (ncols == 0) { 2503 if (cstart) { 2504 offdiagIdx[r] = 0; 2505 } else offdiagIdx[r] = cend; 2506 } else { /* ncols > 0 */ 2507 offdiagIdx[r] = cmap[ncols - 1] + 1; 2508 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2509 } 2510 } 2511 } 2512 2513 for (j = 0; j < ncols; j++) { 2514 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2515 offdiagA[r] = *ba; 2516 offdiagIdx[r] = cmap[*bj]; 2517 } 2518 ba++; 2519 bj++; 2520 } 2521 } 2522 2523 PetscCall(VecGetArrayWrite(v, &a)); 2524 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2525 for (r = 0; r < m; ++r) { 2526 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2527 a[r] = diagA[r]; 2528 if (idx) idx[r] = cstart + diagIdx[r]; 2529 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2530 a[r] = diagA[r]; 2531 if (idx) { 2532 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2533 idx[r] = cstart + diagIdx[r]; 2534 } else idx[r] = offdiagIdx[r]; 2535 } 2536 } else { 2537 a[r] = offdiagA[r]; 2538 if (idx) idx[r] = offdiagIdx[r]; 2539 } 2540 } 2541 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2542 PetscCall(VecRestoreArrayWrite(v, &a)); 2543 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2544 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2545 PetscCall(VecDestroy(&diagV)); 2546 PetscCall(VecDestroy(&offdiagV)); 2547 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2548 PetscFunctionReturn(PETSC_SUCCESS); 2549 } 2550 2551 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2552 { 2553 Mat *dummy; 2554 2555 PetscFunctionBegin; 2556 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2557 *newmat = *dummy; 2558 PetscCall(PetscFree(dummy)); 2559 PetscFunctionReturn(PETSC_SUCCESS); 2560 } 2561 2562 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2563 { 2564 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2565 2566 PetscFunctionBegin; 2567 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2568 A->factorerrortype = a->A->factorerrortype; 2569 PetscFunctionReturn(PETSC_SUCCESS); 2570 } 2571 2572 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2573 { 2574 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2575 2576 PetscFunctionBegin; 2577 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2578 PetscCall(MatSetRandom(aij->A, rctx)); 2579 if (x->assembled) { 2580 PetscCall(MatSetRandom(aij->B, rctx)); 2581 } else { 2582 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2583 } 2584 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2585 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2586 PetscFunctionReturn(PETSC_SUCCESS); 2587 } 2588 2589 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2590 { 2591 PetscFunctionBegin; 2592 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2593 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 /*@ 2598 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2599 2600 Not Collective 2601 2602 Input Parameter: 2603 . A - the matrix 2604 2605 Output Parameter: 2606 . nz - the number of nonzeros 2607 2608 Level: advanced 2609 2610 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2611 @*/ 2612 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2613 { 2614 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2615 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2616 PetscBool isaij; 2617 2618 PetscFunctionBegin; 2619 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2620 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2621 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2622 PetscFunctionReturn(PETSC_SUCCESS); 2623 } 2624 2625 /*@ 2626 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2627 2628 Collective 2629 2630 Input Parameters: 2631 + A - the matrix 2632 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2633 2634 Level: advanced 2635 2636 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2637 @*/ 2638 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2639 { 2640 PetscFunctionBegin; 2641 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2642 PetscFunctionReturn(PETSC_SUCCESS); 2643 } 2644 2645 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2646 { 2647 PetscBool sc = PETSC_FALSE, flg; 2648 2649 PetscFunctionBegin; 2650 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2651 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2652 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2653 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2654 PetscOptionsHeadEnd(); 2655 PetscFunctionReturn(PETSC_SUCCESS); 2656 } 2657 2658 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2659 { 2660 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2661 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2662 2663 PetscFunctionBegin; 2664 if (!Y->preallocated) { 2665 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2666 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2667 PetscInt nonew = aij->nonew; 2668 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2669 aij->nonew = nonew; 2670 } 2671 PetscCall(MatShift_Basic(Y, a)); 2672 PetscFunctionReturn(PETSC_SUCCESS); 2673 } 2674 2675 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2676 { 2677 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2678 2679 PetscFunctionBegin; 2680 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2681 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2682 if (d) { 2683 PetscInt rstart; 2684 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2685 *d += rstart; 2686 } 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2696 PetscFunctionReturn(PETSC_SUCCESS); 2697 } 2698 2699 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2700 { 2701 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2702 2703 PetscFunctionBegin; 2704 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2705 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2706 PetscFunctionReturn(PETSC_SUCCESS); 2707 } 2708 2709 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2710 MatGetRow_MPIAIJ, 2711 MatRestoreRow_MPIAIJ, 2712 MatMult_MPIAIJ, 2713 /* 4*/ MatMultAdd_MPIAIJ, 2714 MatMultTranspose_MPIAIJ, 2715 MatMultTransposeAdd_MPIAIJ, 2716 NULL, 2717 NULL, 2718 NULL, 2719 /*10*/ NULL, 2720 NULL, 2721 NULL, 2722 MatSOR_MPIAIJ, 2723 MatTranspose_MPIAIJ, 2724 /*15*/ MatGetInfo_MPIAIJ, 2725 MatEqual_MPIAIJ, 2726 MatGetDiagonal_MPIAIJ, 2727 MatDiagonalScale_MPIAIJ, 2728 MatNorm_MPIAIJ, 2729 /*20*/ MatAssemblyBegin_MPIAIJ, 2730 MatAssemblyEnd_MPIAIJ, 2731 MatSetOption_MPIAIJ, 2732 MatZeroEntries_MPIAIJ, 2733 /*24*/ MatZeroRows_MPIAIJ, 2734 NULL, 2735 NULL, 2736 NULL, 2737 NULL, 2738 /*29*/ MatSetUp_MPI_Hash, 2739 NULL, 2740 NULL, 2741 MatGetDiagonalBlock_MPIAIJ, 2742 NULL, 2743 /*34*/ MatDuplicate_MPIAIJ, 2744 NULL, 2745 NULL, 2746 NULL, 2747 NULL, 2748 /*39*/ MatAXPY_MPIAIJ, 2749 MatCreateSubMatrices_MPIAIJ, 2750 MatIncreaseOverlap_MPIAIJ, 2751 MatGetValues_MPIAIJ, 2752 MatCopy_MPIAIJ, 2753 /*44*/ MatGetRowMax_MPIAIJ, 2754 MatScale_MPIAIJ, 2755 MatShift_MPIAIJ, 2756 MatDiagonalSet_MPIAIJ, 2757 MatZeroRowsColumns_MPIAIJ, 2758 /*49*/ MatSetRandom_MPIAIJ, 2759 MatGetRowIJ_MPIAIJ, 2760 MatRestoreRowIJ_MPIAIJ, 2761 NULL, 2762 NULL, 2763 /*54*/ MatFDColoringCreate_MPIXAIJ, 2764 NULL, 2765 MatSetUnfactored_MPIAIJ, 2766 MatPermute_MPIAIJ, 2767 NULL, 2768 /*59*/ MatCreateSubMatrix_MPIAIJ, 2769 MatDestroy_MPIAIJ, 2770 MatView_MPIAIJ, 2771 NULL, 2772 NULL, 2773 /*64*/ NULL, 2774 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2775 NULL, 2776 NULL, 2777 NULL, 2778 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2779 MatGetRowMinAbs_MPIAIJ, 2780 NULL, 2781 NULL, 2782 NULL, 2783 NULL, 2784 /*75*/ MatFDColoringApply_AIJ, 2785 MatSetFromOptions_MPIAIJ, 2786 NULL, 2787 NULL, 2788 MatFindZeroDiagonals_MPIAIJ, 2789 /*80*/ NULL, 2790 NULL, 2791 NULL, 2792 /*83*/ MatLoad_MPIAIJ, 2793 MatIsSymmetric_MPIAIJ, 2794 NULL, 2795 NULL, 2796 NULL, 2797 NULL, 2798 /*89*/ NULL, 2799 NULL, 2800 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2801 NULL, 2802 NULL, 2803 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2804 NULL, 2805 NULL, 2806 NULL, 2807 MatBindToCPU_MPIAIJ, 2808 /*99*/ MatProductSetFromOptions_MPIAIJ, 2809 NULL, 2810 NULL, 2811 MatConjugate_MPIAIJ, 2812 NULL, 2813 /*104*/ MatSetValuesRow_MPIAIJ, 2814 MatRealPart_MPIAIJ, 2815 MatImaginaryPart_MPIAIJ, 2816 NULL, 2817 NULL, 2818 /*109*/ NULL, 2819 NULL, 2820 MatGetRowMin_MPIAIJ, 2821 NULL, 2822 MatMissingDiagonal_MPIAIJ, 2823 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2824 NULL, 2825 MatGetGhosts_MPIAIJ, 2826 NULL, 2827 NULL, 2828 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2829 NULL, 2830 NULL, 2831 NULL, 2832 MatGetMultiProcBlock_MPIAIJ, 2833 /*124*/ MatFindNonzeroRows_MPIAIJ, 2834 MatGetColumnReductions_MPIAIJ, 2835 MatInvertBlockDiagonal_MPIAIJ, 2836 MatInvertVariableBlockDiagonal_MPIAIJ, 2837 MatCreateSubMatricesMPI_MPIAIJ, 2838 /*129*/ NULL, 2839 NULL, 2840 NULL, 2841 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2842 NULL, 2843 /*134*/ NULL, 2844 NULL, 2845 NULL, 2846 NULL, 2847 NULL, 2848 /*139*/ MatSetBlockSizes_MPIAIJ, 2849 NULL, 2850 NULL, 2851 MatFDColoringSetUp_MPIXAIJ, 2852 MatFindOffBlockDiagonalEntries_MPIAIJ, 2853 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2854 /*145*/ NULL, 2855 NULL, 2856 NULL, 2857 MatCreateGraph_Simple_AIJ, 2858 NULL, 2859 /*150*/ NULL, 2860 MatEliminateZeros_MPIAIJ}; 2861 2862 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2863 { 2864 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2865 2866 PetscFunctionBegin; 2867 PetscCall(MatStoreValues(aij->A)); 2868 PetscCall(MatStoreValues(aij->B)); 2869 PetscFunctionReturn(PETSC_SUCCESS); 2870 } 2871 2872 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2873 { 2874 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2875 2876 PetscFunctionBegin; 2877 PetscCall(MatRetrieveValues(aij->A)); 2878 PetscCall(MatRetrieveValues(aij->B)); 2879 PetscFunctionReturn(PETSC_SUCCESS); 2880 } 2881 2882 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2883 { 2884 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2885 PetscMPIInt size; 2886 2887 PetscFunctionBegin; 2888 if (B->hash_active) { 2889 B->ops[0] = b->cops; 2890 B->hash_active = PETSC_FALSE; 2891 } 2892 PetscCall(PetscLayoutSetUp(B->rmap)); 2893 PetscCall(PetscLayoutSetUp(B->cmap)); 2894 2895 #if defined(PETSC_USE_CTABLE) 2896 PetscCall(PetscHMapIDestroy(&b->colmap)); 2897 #else 2898 PetscCall(PetscFree(b->colmap)); 2899 #endif 2900 PetscCall(PetscFree(b->garray)); 2901 PetscCall(VecDestroy(&b->lvec)); 2902 PetscCall(VecScatterDestroy(&b->Mvctx)); 2903 2904 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2905 PetscCall(MatDestroy(&b->B)); 2906 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2907 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2908 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2909 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2910 2911 PetscCall(MatDestroy(&b->A)); 2912 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2913 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2914 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2915 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2916 2917 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2918 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2919 B->preallocated = PETSC_TRUE; 2920 B->was_assembled = PETSC_FALSE; 2921 B->assembled = PETSC_FALSE; 2922 PetscFunctionReturn(PETSC_SUCCESS); 2923 } 2924 2925 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2926 { 2927 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2928 2929 PetscFunctionBegin; 2930 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2931 PetscCall(PetscLayoutSetUp(B->rmap)); 2932 PetscCall(PetscLayoutSetUp(B->cmap)); 2933 2934 #if defined(PETSC_USE_CTABLE) 2935 PetscCall(PetscHMapIDestroy(&b->colmap)); 2936 #else 2937 PetscCall(PetscFree(b->colmap)); 2938 #endif 2939 PetscCall(PetscFree(b->garray)); 2940 PetscCall(VecDestroy(&b->lvec)); 2941 PetscCall(VecScatterDestroy(&b->Mvctx)); 2942 2943 PetscCall(MatResetPreallocation(b->A)); 2944 PetscCall(MatResetPreallocation(b->B)); 2945 B->preallocated = PETSC_TRUE; 2946 B->was_assembled = PETSC_FALSE; 2947 B->assembled = PETSC_FALSE; 2948 PetscFunctionReturn(PETSC_SUCCESS); 2949 } 2950 2951 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2952 { 2953 Mat mat; 2954 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2955 2956 PetscFunctionBegin; 2957 *newmat = NULL; 2958 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2959 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2960 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2961 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2962 a = (Mat_MPIAIJ *)mat->data; 2963 2964 mat->factortype = matin->factortype; 2965 mat->assembled = matin->assembled; 2966 mat->insertmode = NOT_SET_VALUES; 2967 2968 a->size = oldmat->size; 2969 a->rank = oldmat->rank; 2970 a->donotstash = oldmat->donotstash; 2971 a->roworiented = oldmat->roworiented; 2972 a->rowindices = NULL; 2973 a->rowvalues = NULL; 2974 a->getrowactive = PETSC_FALSE; 2975 2976 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2977 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2978 if (matin->hash_active) { 2979 PetscCall(MatSetUp(mat)); 2980 } else { 2981 mat->preallocated = matin->preallocated; 2982 if (oldmat->colmap) { 2983 #if defined(PETSC_USE_CTABLE) 2984 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2985 #else 2986 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2987 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2988 #endif 2989 } else a->colmap = NULL; 2990 if (oldmat->garray) { 2991 PetscInt len; 2992 len = oldmat->B->cmap->n; 2993 PetscCall(PetscMalloc1(len + 1, &a->garray)); 2994 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2995 } else a->garray = NULL; 2996 2997 /* It may happen MatDuplicate is called with a non-assembled matrix 2998 In fact, MatDuplicate only requires the matrix to be preallocated 2999 This may happen inside a DMCreateMatrix_Shell */ 3000 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3001 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3002 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3003 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3004 } 3005 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3006 *newmat = mat; 3007 PetscFunctionReturn(PETSC_SUCCESS); 3008 } 3009 3010 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3011 { 3012 PetscBool isbinary, ishdf5; 3013 3014 PetscFunctionBegin; 3015 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3016 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3017 /* force binary viewer to load .info file if it has not yet done so */ 3018 PetscCall(PetscViewerSetUp(viewer)); 3019 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3020 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3021 if (isbinary) { 3022 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3023 } else if (ishdf5) { 3024 #if defined(PETSC_HAVE_HDF5) 3025 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3026 #else 3027 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3028 #endif 3029 } else { 3030 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3031 } 3032 PetscFunctionReturn(PETSC_SUCCESS); 3033 } 3034 3035 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3036 { 3037 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3038 PetscInt *rowidxs, *colidxs; 3039 PetscScalar *matvals; 3040 3041 PetscFunctionBegin; 3042 PetscCall(PetscViewerSetUp(viewer)); 3043 3044 /* read in matrix header */ 3045 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3046 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3047 M = header[1]; 3048 N = header[2]; 3049 nz = header[3]; 3050 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3051 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3052 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3053 3054 /* set block sizes from the viewer's .info file */ 3055 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3056 /* set global sizes if not set already */ 3057 if (mat->rmap->N < 0) mat->rmap->N = M; 3058 if (mat->cmap->N < 0) mat->cmap->N = N; 3059 PetscCall(PetscLayoutSetUp(mat->rmap)); 3060 PetscCall(PetscLayoutSetUp(mat->cmap)); 3061 3062 /* check if the matrix sizes are correct */ 3063 PetscCall(MatGetSize(mat, &rows, &cols)); 3064 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3065 3066 /* read in row lengths and build row indices */ 3067 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3068 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3069 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3070 rowidxs[0] = 0; 3071 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3072 if (nz != PETSC_MAX_INT) { 3073 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3074 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3075 } 3076 3077 /* read in column indices and matrix values */ 3078 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3079 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3080 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3081 /* store matrix indices and values */ 3082 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3083 PetscCall(PetscFree(rowidxs)); 3084 PetscCall(PetscFree2(colidxs, matvals)); 3085 PetscFunctionReturn(PETSC_SUCCESS); 3086 } 3087 3088 /* Not scalable because of ISAllGather() unless getting all columns. */ 3089 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3090 { 3091 IS iscol_local; 3092 PetscBool isstride; 3093 PetscMPIInt lisstride = 0, gisstride; 3094 3095 PetscFunctionBegin; 3096 /* check if we are grabbing all columns*/ 3097 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3098 3099 if (isstride) { 3100 PetscInt start, len, mstart, mlen; 3101 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3102 PetscCall(ISGetLocalSize(iscol, &len)); 3103 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3104 if (mstart == start && mlen - mstart == len) lisstride = 1; 3105 } 3106 3107 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3108 if (gisstride) { 3109 PetscInt N; 3110 PetscCall(MatGetSize(mat, NULL, &N)); 3111 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3112 PetscCall(ISSetIdentity(iscol_local)); 3113 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3114 } else { 3115 PetscInt cbs; 3116 PetscCall(ISGetBlockSize(iscol, &cbs)); 3117 PetscCall(ISAllGather(iscol, &iscol_local)); 3118 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3119 } 3120 3121 *isseq = iscol_local; 3122 PetscFunctionReturn(PETSC_SUCCESS); 3123 } 3124 3125 /* 3126 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3127 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3128 3129 Input Parameters: 3130 + mat - matrix 3131 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3132 i.e., mat->rstart <= isrow[i] < mat->rend 3133 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3134 i.e., mat->cstart <= iscol[i] < mat->cend 3135 3136 Output Parameters: 3137 + isrow_d - sequential row index set for retrieving mat->A 3138 . iscol_d - sequential column index set for retrieving mat->A 3139 . iscol_o - sequential column index set for retrieving mat->B 3140 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3141 */ 3142 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3143 { 3144 Vec x, cmap; 3145 const PetscInt *is_idx; 3146 PetscScalar *xarray, *cmaparray; 3147 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3148 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3149 Mat B = a->B; 3150 Vec lvec = a->lvec, lcmap; 3151 PetscInt i, cstart, cend, Bn = B->cmap->N; 3152 MPI_Comm comm; 3153 VecScatter Mvctx = a->Mvctx; 3154 3155 PetscFunctionBegin; 3156 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3157 PetscCall(ISGetLocalSize(iscol, &ncols)); 3158 3159 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3160 PetscCall(MatCreateVecs(mat, &x, NULL)); 3161 PetscCall(VecSet(x, -1.0)); 3162 PetscCall(VecDuplicate(x, &cmap)); 3163 PetscCall(VecSet(cmap, -1.0)); 3164 3165 /* Get start indices */ 3166 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3167 isstart -= ncols; 3168 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3169 3170 PetscCall(ISGetIndices(iscol, &is_idx)); 3171 PetscCall(VecGetArray(x, &xarray)); 3172 PetscCall(VecGetArray(cmap, &cmaparray)); 3173 PetscCall(PetscMalloc1(ncols, &idx)); 3174 for (i = 0; i < ncols; i++) { 3175 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3176 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3177 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3178 } 3179 PetscCall(VecRestoreArray(x, &xarray)); 3180 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3181 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3182 3183 /* Get iscol_d */ 3184 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3185 PetscCall(ISGetBlockSize(iscol, &i)); 3186 PetscCall(ISSetBlockSize(*iscol_d, i)); 3187 3188 /* Get isrow_d */ 3189 PetscCall(ISGetLocalSize(isrow, &m)); 3190 rstart = mat->rmap->rstart; 3191 PetscCall(PetscMalloc1(m, &idx)); 3192 PetscCall(ISGetIndices(isrow, &is_idx)); 3193 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3194 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3195 3196 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3197 PetscCall(ISGetBlockSize(isrow, &i)); 3198 PetscCall(ISSetBlockSize(*isrow_d, i)); 3199 3200 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3201 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3202 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3203 3204 PetscCall(VecDuplicate(lvec, &lcmap)); 3205 3206 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3207 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3208 3209 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3210 /* off-process column indices */ 3211 count = 0; 3212 PetscCall(PetscMalloc1(Bn, &idx)); 3213 PetscCall(PetscMalloc1(Bn, &cmap1)); 3214 3215 PetscCall(VecGetArray(lvec, &xarray)); 3216 PetscCall(VecGetArray(lcmap, &cmaparray)); 3217 for (i = 0; i < Bn; i++) { 3218 if (PetscRealPart(xarray[i]) > -1.0) { 3219 idx[count] = i; /* local column index in off-diagonal part B */ 3220 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3221 count++; 3222 } 3223 } 3224 PetscCall(VecRestoreArray(lvec, &xarray)); 3225 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3226 3227 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3228 /* cannot ensure iscol_o has same blocksize as iscol! */ 3229 3230 PetscCall(PetscFree(idx)); 3231 *garray = cmap1; 3232 3233 PetscCall(VecDestroy(&x)); 3234 PetscCall(VecDestroy(&cmap)); 3235 PetscCall(VecDestroy(&lcmap)); 3236 PetscFunctionReturn(PETSC_SUCCESS); 3237 } 3238 3239 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3240 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3241 { 3242 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3243 Mat M = NULL; 3244 MPI_Comm comm; 3245 IS iscol_d, isrow_d, iscol_o; 3246 Mat Asub = NULL, Bsub = NULL; 3247 PetscInt n; 3248 3249 PetscFunctionBegin; 3250 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3251 3252 if (call == MAT_REUSE_MATRIX) { 3253 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3254 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3255 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3256 3257 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3258 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3259 3260 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3261 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3262 3263 /* Update diagonal and off-diagonal portions of submat */ 3264 asub = (Mat_MPIAIJ *)(*submat)->data; 3265 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3266 PetscCall(ISGetLocalSize(iscol_o, &n)); 3267 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3268 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3269 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3270 3271 } else { /* call == MAT_INITIAL_MATRIX) */ 3272 const PetscInt *garray; 3273 PetscInt BsubN; 3274 3275 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3276 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3277 3278 /* Create local submatrices Asub and Bsub */ 3279 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3280 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3281 3282 /* Create submatrix M */ 3283 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3284 3285 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3286 asub = (Mat_MPIAIJ *)M->data; 3287 3288 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3289 n = asub->B->cmap->N; 3290 if (BsubN > n) { 3291 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3292 const PetscInt *idx; 3293 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3294 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3295 3296 PetscCall(PetscMalloc1(n, &idx_new)); 3297 j = 0; 3298 PetscCall(ISGetIndices(iscol_o, &idx)); 3299 for (i = 0; i < n; i++) { 3300 if (j >= BsubN) break; 3301 while (subgarray[i] > garray[j]) j++; 3302 3303 if (subgarray[i] == garray[j]) { 3304 idx_new[i] = idx[j++]; 3305 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3306 } 3307 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3308 3309 PetscCall(ISDestroy(&iscol_o)); 3310 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3311 3312 } else if (BsubN < n) { 3313 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3314 } 3315 3316 PetscCall(PetscFree(garray)); 3317 *submat = M; 3318 3319 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3320 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3321 PetscCall(ISDestroy(&isrow_d)); 3322 3323 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3324 PetscCall(ISDestroy(&iscol_d)); 3325 3326 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3327 PetscCall(ISDestroy(&iscol_o)); 3328 } 3329 PetscFunctionReturn(PETSC_SUCCESS); 3330 } 3331 3332 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3333 { 3334 IS iscol_local = NULL, isrow_d; 3335 PetscInt csize; 3336 PetscInt n, i, j, start, end; 3337 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3338 MPI_Comm comm; 3339 3340 PetscFunctionBegin; 3341 /* If isrow has same processor distribution as mat, 3342 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3343 if (call == MAT_REUSE_MATRIX) { 3344 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3345 if (isrow_d) { 3346 sameRowDist = PETSC_TRUE; 3347 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3348 } else { 3349 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3350 if (iscol_local) { 3351 sameRowDist = PETSC_TRUE; 3352 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3353 } 3354 } 3355 } else { 3356 /* Check if isrow has same processor distribution as mat */ 3357 sameDist[0] = PETSC_FALSE; 3358 PetscCall(ISGetLocalSize(isrow, &n)); 3359 if (!n) { 3360 sameDist[0] = PETSC_TRUE; 3361 } else { 3362 PetscCall(ISGetMinMax(isrow, &i, &j)); 3363 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3364 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3365 } 3366 3367 /* Check if iscol has same processor distribution as mat */ 3368 sameDist[1] = PETSC_FALSE; 3369 PetscCall(ISGetLocalSize(iscol, &n)); 3370 if (!n) { 3371 sameDist[1] = PETSC_TRUE; 3372 } else { 3373 PetscCall(ISGetMinMax(iscol, &i, &j)); 3374 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3375 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3376 } 3377 3378 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3379 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3380 sameRowDist = tsameDist[0]; 3381 } 3382 3383 if (sameRowDist) { 3384 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3385 /* isrow and iscol have same processor distribution as mat */ 3386 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3387 PetscFunctionReturn(PETSC_SUCCESS); 3388 } else { /* sameRowDist */ 3389 /* isrow has same processor distribution as mat */ 3390 if (call == MAT_INITIAL_MATRIX) { 3391 PetscBool sorted; 3392 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3393 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3394 PetscCall(ISGetSize(iscol, &i)); 3395 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3396 3397 PetscCall(ISSorted(iscol_local, &sorted)); 3398 if (sorted) { 3399 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3400 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3401 PetscFunctionReturn(PETSC_SUCCESS); 3402 } 3403 } else { /* call == MAT_REUSE_MATRIX */ 3404 IS iscol_sub; 3405 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3406 if (iscol_sub) { 3407 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3408 PetscFunctionReturn(PETSC_SUCCESS); 3409 } 3410 } 3411 } 3412 } 3413 3414 /* General case: iscol -> iscol_local which has global size of iscol */ 3415 if (call == MAT_REUSE_MATRIX) { 3416 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3417 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3418 } else { 3419 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3420 } 3421 3422 PetscCall(ISGetLocalSize(iscol, &csize)); 3423 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3424 3425 if (call == MAT_INITIAL_MATRIX) { 3426 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3427 PetscCall(ISDestroy(&iscol_local)); 3428 } 3429 PetscFunctionReturn(PETSC_SUCCESS); 3430 } 3431 3432 /*@C 3433 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3434 and "off-diagonal" part of the matrix in CSR format. 3435 3436 Collective 3437 3438 Input Parameters: 3439 + comm - MPI communicator 3440 . A - "diagonal" portion of matrix 3441 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3442 - garray - global index of `B` columns 3443 3444 Output Parameter: 3445 . mat - the matrix, with input `A` as its local diagonal matrix 3446 3447 Level: advanced 3448 3449 Notes: 3450 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3451 3452 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3453 3454 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3455 @*/ 3456 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3457 { 3458 Mat_MPIAIJ *maij; 3459 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3460 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3461 const PetscScalar *oa; 3462 Mat Bnew; 3463 PetscInt m, n, N; 3464 MatType mpi_mat_type; 3465 3466 PetscFunctionBegin; 3467 PetscCall(MatCreate(comm, mat)); 3468 PetscCall(MatGetSize(A, &m, &n)); 3469 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3470 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3471 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3472 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3473 3474 /* Get global columns of mat */ 3475 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3476 3477 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3478 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3479 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3480 PetscCall(MatSetType(*mat, mpi_mat_type)); 3481 3482 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3483 maij = (Mat_MPIAIJ *)(*mat)->data; 3484 3485 (*mat)->preallocated = PETSC_TRUE; 3486 3487 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3488 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3489 3490 /* Set A as diagonal portion of *mat */ 3491 maij->A = A; 3492 3493 nz = oi[m]; 3494 for (i = 0; i < nz; i++) { 3495 col = oj[i]; 3496 oj[i] = garray[col]; 3497 } 3498 3499 /* Set Bnew as off-diagonal portion of *mat */ 3500 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3501 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3502 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3503 bnew = (Mat_SeqAIJ *)Bnew->data; 3504 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3505 maij->B = Bnew; 3506 3507 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3508 3509 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3510 b->free_a = PETSC_FALSE; 3511 b->free_ij = PETSC_FALSE; 3512 PetscCall(MatDestroy(&B)); 3513 3514 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3515 bnew->free_a = PETSC_TRUE; 3516 bnew->free_ij = PETSC_TRUE; 3517 3518 /* condense columns of maij->B */ 3519 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3520 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3521 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3522 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3523 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3524 PetscFunctionReturn(PETSC_SUCCESS); 3525 } 3526 3527 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3528 3529 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3530 { 3531 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3532 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3533 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3534 Mat M, Msub, B = a->B; 3535 MatScalar *aa; 3536 Mat_SeqAIJ *aij; 3537 PetscInt *garray = a->garray, *colsub, Ncols; 3538 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3539 IS iscol_sub, iscmap; 3540 const PetscInt *is_idx, *cmap; 3541 PetscBool allcolumns = PETSC_FALSE; 3542 MPI_Comm comm; 3543 3544 PetscFunctionBegin; 3545 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3546 if (call == MAT_REUSE_MATRIX) { 3547 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3548 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3549 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3550 3551 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3552 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3553 3554 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3555 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3556 3557 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3558 3559 } else { /* call == MAT_INITIAL_MATRIX) */ 3560 PetscBool flg; 3561 3562 PetscCall(ISGetLocalSize(iscol, &n)); 3563 PetscCall(ISGetSize(iscol, &Ncols)); 3564 3565 /* (1) iscol -> nonscalable iscol_local */ 3566 /* Check for special case: each processor gets entire matrix columns */ 3567 PetscCall(ISIdentity(iscol_local, &flg)); 3568 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3569 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3570 if (allcolumns) { 3571 iscol_sub = iscol_local; 3572 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3573 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3574 3575 } else { 3576 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3577 PetscInt *idx, *cmap1, k; 3578 PetscCall(PetscMalloc1(Ncols, &idx)); 3579 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3580 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3581 count = 0; 3582 k = 0; 3583 for (i = 0; i < Ncols; i++) { 3584 j = is_idx[i]; 3585 if (j >= cstart && j < cend) { 3586 /* diagonal part of mat */ 3587 idx[count] = j; 3588 cmap1[count++] = i; /* column index in submat */ 3589 } else if (Bn) { 3590 /* off-diagonal part of mat */ 3591 if (j == garray[k]) { 3592 idx[count] = j; 3593 cmap1[count++] = i; /* column index in submat */ 3594 } else if (j > garray[k]) { 3595 while (j > garray[k] && k < Bn - 1) k++; 3596 if (j == garray[k]) { 3597 idx[count] = j; 3598 cmap1[count++] = i; /* column index in submat */ 3599 } 3600 } 3601 } 3602 } 3603 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3604 3605 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3606 PetscCall(ISGetBlockSize(iscol, &cbs)); 3607 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3608 3609 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3610 } 3611 3612 /* (3) Create sequential Msub */ 3613 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3614 } 3615 3616 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3617 aij = (Mat_SeqAIJ *)(Msub)->data; 3618 ii = aij->i; 3619 PetscCall(ISGetIndices(iscmap, &cmap)); 3620 3621 /* 3622 m - number of local rows 3623 Ncols - number of columns (same on all processors) 3624 rstart - first row in new global matrix generated 3625 */ 3626 PetscCall(MatGetSize(Msub, &m, NULL)); 3627 3628 if (call == MAT_INITIAL_MATRIX) { 3629 /* (4) Create parallel newmat */ 3630 PetscMPIInt rank, size; 3631 PetscInt csize; 3632 3633 PetscCallMPI(MPI_Comm_size(comm, &size)); 3634 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3635 3636 /* 3637 Determine the number of non-zeros in the diagonal and off-diagonal 3638 portions of the matrix in order to do correct preallocation 3639 */ 3640 3641 /* first get start and end of "diagonal" columns */ 3642 PetscCall(ISGetLocalSize(iscol, &csize)); 3643 if (csize == PETSC_DECIDE) { 3644 PetscCall(ISGetSize(isrow, &mglobal)); 3645 if (mglobal == Ncols) { /* square matrix */ 3646 nlocal = m; 3647 } else { 3648 nlocal = Ncols / size + ((Ncols % size) > rank); 3649 } 3650 } else { 3651 nlocal = csize; 3652 } 3653 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3654 rstart = rend - nlocal; 3655 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3656 3657 /* next, compute all the lengths */ 3658 jj = aij->j; 3659 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3660 olens = dlens + m; 3661 for (i = 0; i < m; i++) { 3662 jend = ii[i + 1] - ii[i]; 3663 olen = 0; 3664 dlen = 0; 3665 for (j = 0; j < jend; j++) { 3666 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3667 else dlen++; 3668 jj++; 3669 } 3670 olens[i] = olen; 3671 dlens[i] = dlen; 3672 } 3673 3674 PetscCall(ISGetBlockSize(isrow, &bs)); 3675 PetscCall(ISGetBlockSize(iscol, &cbs)); 3676 3677 PetscCall(MatCreate(comm, &M)); 3678 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3679 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3680 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3681 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3682 PetscCall(PetscFree(dlens)); 3683 3684 } else { /* call == MAT_REUSE_MATRIX */ 3685 M = *newmat; 3686 PetscCall(MatGetLocalSize(M, &i, NULL)); 3687 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3688 PetscCall(MatZeroEntries(M)); 3689 /* 3690 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3691 rather than the slower MatSetValues(). 3692 */ 3693 M->was_assembled = PETSC_TRUE; 3694 M->assembled = PETSC_FALSE; 3695 } 3696 3697 /* (5) Set values of Msub to *newmat */ 3698 PetscCall(PetscMalloc1(count, &colsub)); 3699 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3700 3701 jj = aij->j; 3702 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3703 for (i = 0; i < m; i++) { 3704 row = rstart + i; 3705 nz = ii[i + 1] - ii[i]; 3706 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3707 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3708 jj += nz; 3709 aa += nz; 3710 } 3711 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3712 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3713 3714 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3715 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3716 3717 PetscCall(PetscFree(colsub)); 3718 3719 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3720 if (call == MAT_INITIAL_MATRIX) { 3721 *newmat = M; 3722 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3723 PetscCall(MatDestroy(&Msub)); 3724 3725 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3726 PetscCall(ISDestroy(&iscol_sub)); 3727 3728 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3729 PetscCall(ISDestroy(&iscmap)); 3730 3731 if (iscol_local) { 3732 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3733 PetscCall(ISDestroy(&iscol_local)); 3734 } 3735 } 3736 PetscFunctionReturn(PETSC_SUCCESS); 3737 } 3738 3739 /* 3740 Not great since it makes two copies of the submatrix, first an SeqAIJ 3741 in local and then by concatenating the local matrices the end result. 3742 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3743 3744 This requires a sequential iscol with all indices. 3745 */ 3746 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3747 { 3748 PetscMPIInt rank, size; 3749 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3750 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3751 Mat M, Mreuse; 3752 MatScalar *aa, *vwork; 3753 MPI_Comm comm; 3754 Mat_SeqAIJ *aij; 3755 PetscBool colflag, allcolumns = PETSC_FALSE; 3756 3757 PetscFunctionBegin; 3758 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3759 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3760 PetscCallMPI(MPI_Comm_size(comm, &size)); 3761 3762 /* Check for special case: each processor gets entire matrix columns */ 3763 PetscCall(ISIdentity(iscol, &colflag)); 3764 PetscCall(ISGetLocalSize(iscol, &n)); 3765 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3766 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3767 3768 if (call == MAT_REUSE_MATRIX) { 3769 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3770 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3771 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3772 } else { 3773 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3774 } 3775 3776 /* 3777 m - number of local rows 3778 n - number of columns (same on all processors) 3779 rstart - first row in new global matrix generated 3780 */ 3781 PetscCall(MatGetSize(Mreuse, &m, &n)); 3782 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3783 if (call == MAT_INITIAL_MATRIX) { 3784 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3785 ii = aij->i; 3786 jj = aij->j; 3787 3788 /* 3789 Determine the number of non-zeros in the diagonal and off-diagonal 3790 portions of the matrix in order to do correct preallocation 3791 */ 3792 3793 /* first get start and end of "diagonal" columns */ 3794 if (csize == PETSC_DECIDE) { 3795 PetscCall(ISGetSize(isrow, &mglobal)); 3796 if (mglobal == n) { /* square matrix */ 3797 nlocal = m; 3798 } else { 3799 nlocal = n / size + ((n % size) > rank); 3800 } 3801 } else { 3802 nlocal = csize; 3803 } 3804 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3805 rstart = rend - nlocal; 3806 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3807 3808 /* next, compute all the lengths */ 3809 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3810 olens = dlens + m; 3811 for (i = 0; i < m; i++) { 3812 jend = ii[i + 1] - ii[i]; 3813 olen = 0; 3814 dlen = 0; 3815 for (j = 0; j < jend; j++) { 3816 if (*jj < rstart || *jj >= rend) olen++; 3817 else dlen++; 3818 jj++; 3819 } 3820 olens[i] = olen; 3821 dlens[i] = dlen; 3822 } 3823 PetscCall(MatCreate(comm, &M)); 3824 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3825 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3826 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3827 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3828 PetscCall(PetscFree(dlens)); 3829 } else { 3830 PetscInt ml, nl; 3831 3832 M = *newmat; 3833 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3834 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3835 PetscCall(MatZeroEntries(M)); 3836 /* 3837 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3838 rather than the slower MatSetValues(). 3839 */ 3840 M->was_assembled = PETSC_TRUE; 3841 M->assembled = PETSC_FALSE; 3842 } 3843 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3844 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3845 ii = aij->i; 3846 jj = aij->j; 3847 3848 /* trigger copy to CPU if needed */ 3849 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3850 for (i = 0; i < m; i++) { 3851 row = rstart + i; 3852 nz = ii[i + 1] - ii[i]; 3853 cwork = jj; 3854 jj = PetscSafePointerPlusOffset(jj, nz); 3855 vwork = aa; 3856 aa = PetscSafePointerPlusOffset(aa, nz); 3857 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3858 } 3859 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3860 3861 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3862 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3863 *newmat = M; 3864 3865 /* save submatrix used in processor for next request */ 3866 if (call == MAT_INITIAL_MATRIX) { 3867 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3868 PetscCall(MatDestroy(&Mreuse)); 3869 } 3870 PetscFunctionReturn(PETSC_SUCCESS); 3871 } 3872 3873 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3874 { 3875 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3876 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3877 const PetscInt *JJ; 3878 PetscBool nooffprocentries; 3879 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3880 3881 PetscFunctionBegin; 3882 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3883 3884 PetscCall(PetscLayoutSetUp(B->rmap)); 3885 PetscCall(PetscLayoutSetUp(B->cmap)); 3886 m = B->rmap->n; 3887 cstart = B->cmap->rstart; 3888 cend = B->cmap->rend; 3889 rstart = B->rmap->rstart; 3890 3891 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3892 3893 if (PetscDefined(USE_DEBUG)) { 3894 for (i = 0; i < m; i++) { 3895 nnz = Ii[i + 1] - Ii[i]; 3896 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3897 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3898 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3899 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3900 } 3901 } 3902 3903 for (i = 0; i < m; i++) { 3904 nnz = Ii[i + 1] - Ii[i]; 3905 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3906 nnz_max = PetscMax(nnz_max, nnz); 3907 d = 0; 3908 for (j = 0; j < nnz; j++) { 3909 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3910 } 3911 d_nnz[i] = d; 3912 o_nnz[i] = nnz - d; 3913 } 3914 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3915 PetscCall(PetscFree2(d_nnz, o_nnz)); 3916 3917 for (i = 0; i < m; i++) { 3918 ii = i + rstart; 3919 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES)); 3920 } 3921 nooffprocentries = B->nooffprocentries; 3922 B->nooffprocentries = PETSC_TRUE; 3923 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3924 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3925 B->nooffprocentries = nooffprocentries; 3926 3927 /* count number of entries below block diagonal */ 3928 PetscCall(PetscFree(Aij->ld)); 3929 PetscCall(PetscCalloc1(m, &ld)); 3930 Aij->ld = ld; 3931 for (i = 0; i < m; i++) { 3932 nnz = Ii[i + 1] - Ii[i]; 3933 j = 0; 3934 while (j < nnz && J[j] < cstart) j++; 3935 ld[i] = j; 3936 if (J) J += nnz; 3937 } 3938 3939 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3940 PetscFunctionReturn(PETSC_SUCCESS); 3941 } 3942 3943 /*@ 3944 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3945 (the default parallel PETSc format). 3946 3947 Collective 3948 3949 Input Parameters: 3950 + B - the matrix 3951 . i - the indices into j for the start of each local row (starts with zero) 3952 . j - the column indices for each local row (starts with zero) 3953 - v - optional values in the matrix 3954 3955 Level: developer 3956 3957 Notes: 3958 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3959 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3960 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3961 3962 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3963 3964 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3965 3966 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3967 3968 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3969 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3970 3971 The format which is used for the sparse matrix input, is equivalent to a 3972 row-major ordering.. i.e for the following matrix, the input data expected is 3973 as shown 3974 .vb 3975 1 0 0 3976 2 0 3 P0 3977 ------- 3978 4 5 6 P1 3979 3980 Process0 [P0] rows_owned=[0,1] 3981 i = {0,1,3} [size = nrow+1 = 2+1] 3982 j = {0,0,2} [size = 3] 3983 v = {1,2,3} [size = 3] 3984 3985 Process1 [P1] rows_owned=[2] 3986 i = {0,3} [size = nrow+1 = 1+1] 3987 j = {0,1,2} [size = 3] 3988 v = {4,5,6} [size = 3] 3989 .ve 3990 3991 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 3992 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 3993 @*/ 3994 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3995 { 3996 PetscFunctionBegin; 3997 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3998 PetscFunctionReturn(PETSC_SUCCESS); 3999 } 4000 4001 /*@C 4002 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4003 (the default parallel PETSc format). For good matrix assembly performance 4004 the user should preallocate the matrix storage by setting the parameters 4005 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4006 4007 Collective 4008 4009 Input Parameters: 4010 + B - the matrix 4011 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4012 (same value is used for all local rows) 4013 . d_nnz - array containing the number of nonzeros in the various rows of the 4014 DIAGONAL portion of the local submatrix (possibly different for each row) 4015 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4016 The size of this array is equal to the number of local rows, i.e 'm'. 4017 For matrices that will be factored, you must leave room for (and set) 4018 the diagonal entry even if it is zero. 4019 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4020 submatrix (same value is used for all local rows). 4021 - o_nnz - array containing the number of nonzeros in the various rows of the 4022 OFF-DIAGONAL portion of the local submatrix (possibly different for 4023 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4024 structure. The size of this array is equal to the number 4025 of local rows, i.e 'm'. 4026 4027 Example Usage: 4028 Consider the following 8x8 matrix with 34 non-zero values, that is 4029 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4030 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4031 as follows 4032 4033 .vb 4034 1 2 0 | 0 3 0 | 0 4 4035 Proc0 0 5 6 | 7 0 0 | 8 0 4036 9 0 10 | 11 0 0 | 12 0 4037 ------------------------------------- 4038 13 0 14 | 15 16 17 | 0 0 4039 Proc1 0 18 0 | 19 20 21 | 0 0 4040 0 0 0 | 22 23 0 | 24 0 4041 ------------------------------------- 4042 Proc2 25 26 27 | 0 0 28 | 29 0 4043 30 0 0 | 31 32 33 | 0 34 4044 .ve 4045 4046 This can be represented as a collection of submatrices as 4047 .vb 4048 A B C 4049 D E F 4050 G H I 4051 .ve 4052 4053 Where the submatrices A,B,C are owned by proc0, D,E,F are 4054 owned by proc1, G,H,I are owned by proc2. 4055 4056 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4057 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4058 The 'M','N' parameters are 8,8, and have the same values on all procs. 4059 4060 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4061 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4062 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4063 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4064 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4065 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4066 4067 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4068 allocated for every row of the local diagonal submatrix, and `o_nz` 4069 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4070 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4071 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4072 In this case, the values of `d_nz`, `o_nz` are 4073 .vb 4074 proc0 dnz = 2, o_nz = 2 4075 proc1 dnz = 3, o_nz = 2 4076 proc2 dnz = 1, o_nz = 4 4077 .ve 4078 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4079 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4080 for proc3. i.e we are using 12+15+10=37 storage locations to store 4081 34 values. 4082 4083 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4084 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4085 In the above case the values for `d_nnz`, `o_nnz` are 4086 .vb 4087 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4088 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4089 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4090 .ve 4091 Here the space allocated is sum of all the above values i.e 34, and 4092 hence pre-allocation is perfect. 4093 4094 Level: intermediate 4095 4096 Notes: 4097 If the *_nnz parameter is given then the *_nz parameter is ignored 4098 4099 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4100 storage. The stored row and column indices begin with zero. 4101 See [Sparse Matrices](sec_matsparse) for details. 4102 4103 The parallel matrix is partitioned such that the first m0 rows belong to 4104 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4105 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4106 4107 The DIAGONAL portion of the local submatrix of a processor can be defined 4108 as the submatrix which is obtained by extraction the part corresponding to 4109 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4110 first row that belongs to the processor, r2 is the last row belonging to 4111 the this processor, and c1-c2 is range of indices of the local part of a 4112 vector suitable for applying the matrix to. This is an mxn matrix. In the 4113 common case of a square matrix, the row and column ranges are the same and 4114 the DIAGONAL part is also square. The remaining portion of the local 4115 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4116 4117 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4118 4119 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4120 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4121 You can also run with the option `-info` and look for messages with the string 4122 malloc in them to see if additional memory allocation was needed. 4123 4124 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4125 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4126 @*/ 4127 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4128 { 4129 PetscFunctionBegin; 4130 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4131 PetscValidType(B, 1); 4132 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4133 PetscFunctionReturn(PETSC_SUCCESS); 4134 } 4135 4136 /*@ 4137 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4138 CSR format for the local rows. 4139 4140 Collective 4141 4142 Input Parameters: 4143 + comm - MPI communicator 4144 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4145 . n - This value should be the same as the local size used in creating the 4146 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4147 calculated if N is given) For square matrices n is almost always m. 4148 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4149 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4150 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4151 . j - column indices 4152 - a - optional matrix values 4153 4154 Output Parameter: 4155 . mat - the matrix 4156 4157 Level: intermediate 4158 4159 Notes: 4160 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4161 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4162 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4163 4164 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4165 4166 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArrays()` 4167 4168 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 4169 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4170 4171 The format which is used for the sparse matrix input, is equivalent to a 4172 row-major ordering.. i.e for the following matrix, the input data expected is 4173 as shown 4174 .vb 4175 1 0 0 4176 2 0 3 P0 4177 ------- 4178 4 5 6 P1 4179 4180 Process0 [P0] rows_owned=[0,1] 4181 i = {0,1,3} [size = nrow+1 = 2+1] 4182 j = {0,0,2} [size = 3] 4183 v = {1,2,3} [size = 3] 4184 4185 Process1 [P1] rows_owned=[2] 4186 i = {0,3} [size = nrow+1 = 1+1] 4187 j = {0,1,2} [size = 3] 4188 v = {4,5,6} [size = 3] 4189 .ve 4190 4191 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4192 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4193 @*/ 4194 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4195 { 4196 PetscFunctionBegin; 4197 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4198 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4199 PetscCall(MatCreate(comm, mat)); 4200 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4201 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4202 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4203 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4204 PetscFunctionReturn(PETSC_SUCCESS); 4205 } 4206 4207 /*@ 4208 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4209 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4210 from `MatCreateMPIAIJWithArrays()` 4211 4212 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4213 4214 Collective 4215 4216 Input Parameters: 4217 + mat - the matrix 4218 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4219 . n - This value should be the same as the local size used in creating the 4220 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4221 calculated if N is given) For square matrices n is almost always m. 4222 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4223 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4224 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4225 . J - column indices 4226 - v - matrix values 4227 4228 Level: deprecated 4229 4230 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4231 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4232 @*/ 4233 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4234 { 4235 PetscInt nnz, i; 4236 PetscBool nooffprocentries; 4237 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4238 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4239 PetscScalar *ad, *ao; 4240 PetscInt ldi, Iii, md; 4241 const PetscInt *Adi = Ad->i; 4242 PetscInt *ld = Aij->ld; 4243 4244 PetscFunctionBegin; 4245 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4246 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4247 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4248 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4249 4250 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4251 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4252 4253 for (i = 0; i < m; i++) { 4254 if (PetscDefined(USE_DEBUG)) { 4255 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4256 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4257 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4258 } 4259 } 4260 nnz = Ii[i + 1] - Ii[i]; 4261 Iii = Ii[i]; 4262 ldi = ld[i]; 4263 md = Adi[i + 1] - Adi[i]; 4264 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4265 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4266 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4267 ad += md; 4268 ao += nnz - md; 4269 } 4270 nooffprocentries = mat->nooffprocentries; 4271 mat->nooffprocentries = PETSC_TRUE; 4272 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4273 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4274 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4275 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4276 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4277 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4278 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4279 mat->nooffprocentries = nooffprocentries; 4280 PetscFunctionReturn(PETSC_SUCCESS); 4281 } 4282 4283 /*@ 4284 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4285 4286 Collective 4287 4288 Input Parameters: 4289 + mat - the matrix 4290 - v - matrix values, stored by row 4291 4292 Level: intermediate 4293 4294 Notes: 4295 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4296 4297 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4298 4299 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4300 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4301 @*/ 4302 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4303 { 4304 PetscInt nnz, i, m; 4305 PetscBool nooffprocentries; 4306 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4307 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4308 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4309 PetscScalar *ad, *ao; 4310 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4311 PetscInt ldi, Iii, md; 4312 PetscInt *ld = Aij->ld; 4313 4314 PetscFunctionBegin; 4315 m = mat->rmap->n; 4316 4317 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4318 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4319 Iii = 0; 4320 for (i = 0; i < m; i++) { 4321 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4322 ldi = ld[i]; 4323 md = Adi[i + 1] - Adi[i]; 4324 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4325 ad += md; 4326 if (ao) { 4327 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4328 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4329 ao += nnz - md; 4330 } 4331 Iii += nnz; 4332 } 4333 nooffprocentries = mat->nooffprocentries; 4334 mat->nooffprocentries = PETSC_TRUE; 4335 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4336 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4337 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4338 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4339 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4340 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4341 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4342 mat->nooffprocentries = nooffprocentries; 4343 PetscFunctionReturn(PETSC_SUCCESS); 4344 } 4345 4346 /*@C 4347 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4348 (the default parallel PETSc format). For good matrix assembly performance 4349 the user should preallocate the matrix storage by setting the parameters 4350 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4351 4352 Collective 4353 4354 Input Parameters: 4355 + comm - MPI communicator 4356 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4357 This value should be the same as the local size used in creating the 4358 y vector for the matrix-vector product y = Ax. 4359 . n - This value should be the same as the local size used in creating the 4360 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4361 calculated if N is given) For square matrices n is almost always m. 4362 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4363 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4364 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4365 (same value is used for all local rows) 4366 . d_nnz - array containing the number of nonzeros in the various rows of the 4367 DIAGONAL portion of the local submatrix (possibly different for each row) 4368 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4369 The size of this array is equal to the number of local rows, i.e 'm'. 4370 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4371 submatrix (same value is used for all local rows). 4372 - o_nnz - array containing the number of nonzeros in the various rows of the 4373 OFF-DIAGONAL portion of the local submatrix (possibly different for 4374 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4375 structure. The size of this array is equal to the number 4376 of local rows, i.e 'm'. 4377 4378 Output Parameter: 4379 . A - the matrix 4380 4381 Options Database Keys: 4382 + -mat_no_inode - Do not use inodes 4383 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4384 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4385 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4386 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4387 4388 Level: intermediate 4389 4390 Notes: 4391 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4392 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4393 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4394 4395 If the *_nnz parameter is given then the *_nz parameter is ignored 4396 4397 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4398 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4399 storage requirements for this matrix. 4400 4401 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4402 processor than it must be used on all processors that share the object for 4403 that argument. 4404 4405 The user MUST specify either the local or global matrix dimensions 4406 (possibly both). 4407 4408 The parallel matrix is partitioned across processors such that the 4409 first m0 rows belong to process 0, the next m1 rows belong to 4410 process 1, the next m2 rows belong to process 2 etc.. where 4411 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4412 values corresponding to [m x N] submatrix. 4413 4414 The columns are logically partitioned with the n0 columns belonging 4415 to 0th partition, the next n1 columns belonging to the next 4416 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4417 4418 The DIAGONAL portion of the local submatrix on any given processor 4419 is the submatrix corresponding to the rows and columns m,n 4420 corresponding to the given processor. i.e diagonal matrix on 4421 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4422 etc. The remaining portion of the local submatrix [m x (N-n)] 4423 constitute the OFF-DIAGONAL portion. The example below better 4424 illustrates this concept. 4425 4426 For a square global matrix we define each processor's diagonal portion 4427 to be its local rows and the corresponding columns (a square submatrix); 4428 each processor's off-diagonal portion encompasses the remainder of the 4429 local matrix (a rectangular submatrix). 4430 4431 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4432 4433 When calling this routine with a single process communicator, a matrix of 4434 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4435 type of communicator, use the construction mechanism 4436 .vb 4437 MatCreate(..., &A); 4438 MatSetType(A, MATMPIAIJ); 4439 MatSetSizes(A, m, n, M, N); 4440 MatMPIAIJSetPreallocation(A, ...); 4441 .ve 4442 4443 By default, this format uses inodes (identical nodes) when possible. 4444 We search for consecutive rows with the same nonzero structure, thereby 4445 reusing matrix information to achieve increased efficiency. 4446 4447 Example Usage: 4448 Consider the following 8x8 matrix with 34 non-zero values, that is 4449 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4450 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4451 as follows 4452 4453 .vb 4454 1 2 0 | 0 3 0 | 0 4 4455 Proc0 0 5 6 | 7 0 0 | 8 0 4456 9 0 10 | 11 0 0 | 12 0 4457 ------------------------------------- 4458 13 0 14 | 15 16 17 | 0 0 4459 Proc1 0 18 0 | 19 20 21 | 0 0 4460 0 0 0 | 22 23 0 | 24 0 4461 ------------------------------------- 4462 Proc2 25 26 27 | 0 0 28 | 29 0 4463 30 0 0 | 31 32 33 | 0 34 4464 .ve 4465 4466 This can be represented as a collection of submatrices as 4467 4468 .vb 4469 A B C 4470 D E F 4471 G H I 4472 .ve 4473 4474 Where the submatrices A,B,C are owned by proc0, D,E,F are 4475 owned by proc1, G,H,I are owned by proc2. 4476 4477 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4478 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4479 The 'M','N' parameters are 8,8, and have the same values on all procs. 4480 4481 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4482 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4483 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4484 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4485 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4486 matrix, ans [DF] as another SeqAIJ matrix. 4487 4488 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4489 allocated for every row of the local diagonal submatrix, and `o_nz` 4490 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4491 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4492 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4493 In this case, the values of `d_nz`,`o_nz` are 4494 .vb 4495 proc0 dnz = 2, o_nz = 2 4496 proc1 dnz = 3, o_nz = 2 4497 proc2 dnz = 1, o_nz = 4 4498 .ve 4499 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4500 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4501 for proc3. i.e we are using 12+15+10=37 storage locations to store 4502 34 values. 4503 4504 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4505 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4506 In the above case the values for d_nnz,o_nnz are 4507 .vb 4508 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4509 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4510 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4511 .ve 4512 Here the space allocated is sum of all the above values i.e 34, and 4513 hence pre-allocation is perfect. 4514 4515 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4516 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4517 @*/ 4518 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4519 { 4520 PetscMPIInt size; 4521 4522 PetscFunctionBegin; 4523 PetscCall(MatCreate(comm, A)); 4524 PetscCall(MatSetSizes(*A, m, n, M, N)); 4525 PetscCallMPI(MPI_Comm_size(comm, &size)); 4526 if (size > 1) { 4527 PetscCall(MatSetType(*A, MATMPIAIJ)); 4528 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4529 } else { 4530 PetscCall(MatSetType(*A, MATSEQAIJ)); 4531 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4532 } 4533 PetscFunctionReturn(PETSC_SUCCESS); 4534 } 4535 4536 /*MC 4537 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4538 4539 Synopsis: 4540 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4541 4542 Not Collective 4543 4544 Input Parameter: 4545 . A - the `MATMPIAIJ` matrix 4546 4547 Output Parameters: 4548 + Ad - the diagonal portion of the matrix 4549 . Ao - the off-diagonal portion of the matrix 4550 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4551 - ierr - error code 4552 4553 Level: advanced 4554 4555 Note: 4556 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4557 4558 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4559 M*/ 4560 4561 /*MC 4562 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4563 4564 Synopsis: 4565 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4566 4567 Not Collective 4568 4569 Input Parameters: 4570 + A - the `MATMPIAIJ` matrix 4571 . Ad - the diagonal portion of the matrix 4572 . Ao - the off-diagonal portion of the matrix 4573 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4574 - ierr - error code 4575 4576 Level: advanced 4577 4578 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4579 M*/ 4580 4581 /*@C 4582 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4583 4584 Not Collective 4585 4586 Input Parameter: 4587 . A - The `MATMPIAIJ` matrix 4588 4589 Output Parameters: 4590 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4591 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4592 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4593 4594 Level: intermediate 4595 4596 Note: 4597 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4598 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4599 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4600 local column numbers to global column numbers in the original matrix. 4601 4602 Fortran Notes: 4603 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4604 4605 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4606 @*/ 4607 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4608 { 4609 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4610 PetscBool flg; 4611 4612 PetscFunctionBegin; 4613 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4614 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4615 if (Ad) *Ad = a->A; 4616 if (Ao) *Ao = a->B; 4617 if (colmap) *colmap = a->garray; 4618 PetscFunctionReturn(PETSC_SUCCESS); 4619 } 4620 4621 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4622 { 4623 PetscInt m, N, i, rstart, nnz, Ii; 4624 PetscInt *indx; 4625 PetscScalar *values; 4626 MatType rootType; 4627 4628 PetscFunctionBegin; 4629 PetscCall(MatGetSize(inmat, &m, &N)); 4630 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4631 PetscInt *dnz, *onz, sum, bs, cbs; 4632 4633 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4634 /* Check sum(n) = N */ 4635 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4636 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4637 4638 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4639 rstart -= m; 4640 4641 MatPreallocateBegin(comm, m, n, dnz, onz); 4642 for (i = 0; i < m; i++) { 4643 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4644 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4645 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4646 } 4647 4648 PetscCall(MatCreate(comm, outmat)); 4649 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4650 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4651 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4652 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4653 PetscCall(MatSetType(*outmat, rootType)); 4654 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4655 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4656 MatPreallocateEnd(dnz, onz); 4657 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4658 } 4659 4660 /* numeric phase */ 4661 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4662 for (i = 0; i < m; i++) { 4663 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4664 Ii = i + rstart; 4665 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4666 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4667 } 4668 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4669 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4670 PetscFunctionReturn(PETSC_SUCCESS); 4671 } 4672 4673 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4674 { 4675 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4676 4677 PetscFunctionBegin; 4678 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4679 PetscCall(PetscFree(merge->id_r)); 4680 PetscCall(PetscFree(merge->len_s)); 4681 PetscCall(PetscFree(merge->len_r)); 4682 PetscCall(PetscFree(merge->bi)); 4683 PetscCall(PetscFree(merge->bj)); 4684 PetscCall(PetscFree(merge->buf_ri[0])); 4685 PetscCall(PetscFree(merge->buf_ri)); 4686 PetscCall(PetscFree(merge->buf_rj[0])); 4687 PetscCall(PetscFree(merge->buf_rj)); 4688 PetscCall(PetscFree(merge->coi)); 4689 PetscCall(PetscFree(merge->coj)); 4690 PetscCall(PetscFree(merge->owners_co)); 4691 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4692 PetscCall(PetscFree(merge)); 4693 PetscFunctionReturn(PETSC_SUCCESS); 4694 } 4695 4696 #include <../src/mat/utils/freespace.h> 4697 #include <petscbt.h> 4698 4699 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4700 { 4701 MPI_Comm comm; 4702 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4703 PetscMPIInt size, rank, taga, *len_s; 4704 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4705 PetscInt proc, m; 4706 PetscInt **buf_ri, **buf_rj; 4707 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4708 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4709 MPI_Request *s_waits, *r_waits; 4710 MPI_Status *status; 4711 const MatScalar *aa, *a_a; 4712 MatScalar **abuf_r, *ba_i; 4713 Mat_Merge_SeqsToMPI *merge; 4714 PetscContainer container; 4715 4716 PetscFunctionBegin; 4717 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4718 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4719 4720 PetscCallMPI(MPI_Comm_size(comm, &size)); 4721 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4722 4723 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4724 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4725 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4726 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4727 aa = a_a; 4728 4729 bi = merge->bi; 4730 bj = merge->bj; 4731 buf_ri = merge->buf_ri; 4732 buf_rj = merge->buf_rj; 4733 4734 PetscCall(PetscMalloc1(size, &status)); 4735 owners = merge->rowmap->range; 4736 len_s = merge->len_s; 4737 4738 /* send and recv matrix values */ 4739 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4740 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4741 4742 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4743 for (proc = 0, k = 0; proc < size; proc++) { 4744 if (!len_s[proc]) continue; 4745 i = owners[proc]; 4746 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4747 k++; 4748 } 4749 4750 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4751 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4752 PetscCall(PetscFree(status)); 4753 4754 PetscCall(PetscFree(s_waits)); 4755 PetscCall(PetscFree(r_waits)); 4756 4757 /* insert mat values of mpimat */ 4758 PetscCall(PetscMalloc1(N, &ba_i)); 4759 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4760 4761 for (k = 0; k < merge->nrecv; k++) { 4762 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4763 nrows = *(buf_ri_k[k]); 4764 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4765 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4766 } 4767 4768 /* set values of ba */ 4769 m = merge->rowmap->n; 4770 for (i = 0; i < m; i++) { 4771 arow = owners[rank] + i; 4772 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4773 bnzi = bi[i + 1] - bi[i]; 4774 PetscCall(PetscArrayzero(ba_i, bnzi)); 4775 4776 /* add local non-zero vals of this proc's seqmat into ba */ 4777 anzi = ai[arow + 1] - ai[arow]; 4778 aj = a->j + ai[arow]; 4779 aa = a_a + ai[arow]; 4780 nextaj = 0; 4781 for (j = 0; nextaj < anzi; j++) { 4782 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4783 ba_i[j] += aa[nextaj++]; 4784 } 4785 } 4786 4787 /* add received vals into ba */ 4788 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4789 /* i-th row */ 4790 if (i == *nextrow[k]) { 4791 anzi = *(nextai[k] + 1) - *nextai[k]; 4792 aj = buf_rj[k] + *(nextai[k]); 4793 aa = abuf_r[k] + *(nextai[k]); 4794 nextaj = 0; 4795 for (j = 0; nextaj < anzi; j++) { 4796 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4797 ba_i[j] += aa[nextaj++]; 4798 } 4799 } 4800 nextrow[k]++; 4801 nextai[k]++; 4802 } 4803 } 4804 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4805 } 4806 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4807 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4808 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4809 4810 PetscCall(PetscFree(abuf_r[0])); 4811 PetscCall(PetscFree(abuf_r)); 4812 PetscCall(PetscFree(ba_i)); 4813 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4814 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4815 PetscFunctionReturn(PETSC_SUCCESS); 4816 } 4817 4818 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4819 { 4820 Mat B_mpi; 4821 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4822 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4823 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4824 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4825 PetscInt len, proc, *dnz, *onz, bs, cbs; 4826 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4827 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4828 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4829 MPI_Status *status; 4830 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4831 PetscBT lnkbt; 4832 Mat_Merge_SeqsToMPI *merge; 4833 PetscContainer container; 4834 4835 PetscFunctionBegin; 4836 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4837 4838 /* make sure it is a PETSc comm */ 4839 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4840 PetscCallMPI(MPI_Comm_size(comm, &size)); 4841 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4842 4843 PetscCall(PetscNew(&merge)); 4844 PetscCall(PetscMalloc1(size, &status)); 4845 4846 /* determine row ownership */ 4847 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4848 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4849 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4850 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4851 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4852 PetscCall(PetscMalloc1(size, &len_si)); 4853 PetscCall(PetscMalloc1(size, &merge->len_s)); 4854 4855 m = merge->rowmap->n; 4856 owners = merge->rowmap->range; 4857 4858 /* determine the number of messages to send, their lengths */ 4859 len_s = merge->len_s; 4860 4861 len = 0; /* length of buf_si[] */ 4862 merge->nsend = 0; 4863 for (proc = 0; proc < size; proc++) { 4864 len_si[proc] = 0; 4865 if (proc == rank) { 4866 len_s[proc] = 0; 4867 } else { 4868 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4869 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4870 } 4871 if (len_s[proc]) { 4872 merge->nsend++; 4873 nrows = 0; 4874 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4875 if (ai[i + 1] > ai[i]) nrows++; 4876 } 4877 len_si[proc] = 2 * (nrows + 1); 4878 len += len_si[proc]; 4879 } 4880 } 4881 4882 /* determine the number and length of messages to receive for ij-structure */ 4883 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4884 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4885 4886 /* post the Irecv of j-structure */ 4887 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4888 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4889 4890 /* post the Isend of j-structure */ 4891 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4892 4893 for (proc = 0, k = 0; proc < size; proc++) { 4894 if (!len_s[proc]) continue; 4895 i = owners[proc]; 4896 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4897 k++; 4898 } 4899 4900 /* receives and sends of j-structure are complete */ 4901 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4902 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4903 4904 /* send and recv i-structure */ 4905 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4906 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4907 4908 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4909 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4910 for (proc = 0, k = 0; proc < size; proc++) { 4911 if (!len_s[proc]) continue; 4912 /* form outgoing message for i-structure: 4913 buf_si[0]: nrows to be sent 4914 [1:nrows]: row index (global) 4915 [nrows+1:2*nrows+1]: i-structure index 4916 */ 4917 nrows = len_si[proc] / 2 - 1; 4918 buf_si_i = buf_si + nrows + 1; 4919 buf_si[0] = nrows; 4920 buf_si_i[0] = 0; 4921 nrows = 0; 4922 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4923 anzi = ai[i + 1] - ai[i]; 4924 if (anzi) { 4925 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4926 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4927 nrows++; 4928 } 4929 } 4930 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4931 k++; 4932 buf_si += len_si[proc]; 4933 } 4934 4935 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4936 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4937 4938 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4939 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4940 4941 PetscCall(PetscFree(len_si)); 4942 PetscCall(PetscFree(len_ri)); 4943 PetscCall(PetscFree(rj_waits)); 4944 PetscCall(PetscFree2(si_waits, sj_waits)); 4945 PetscCall(PetscFree(ri_waits)); 4946 PetscCall(PetscFree(buf_s)); 4947 PetscCall(PetscFree(status)); 4948 4949 /* compute a local seq matrix in each processor */ 4950 /* allocate bi array and free space for accumulating nonzero column info */ 4951 PetscCall(PetscMalloc1(m + 1, &bi)); 4952 bi[0] = 0; 4953 4954 /* create and initialize a linked list */ 4955 nlnk = N + 1; 4956 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4957 4958 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4959 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4960 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4961 4962 current_space = free_space; 4963 4964 /* determine symbolic info for each local row */ 4965 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4966 4967 for (k = 0; k < merge->nrecv; k++) { 4968 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4969 nrows = *buf_ri_k[k]; 4970 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4971 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4972 } 4973 4974 MatPreallocateBegin(comm, m, n, dnz, onz); 4975 len = 0; 4976 for (i = 0; i < m; i++) { 4977 bnzi = 0; 4978 /* add local non-zero cols of this proc's seqmat into lnk */ 4979 arow = owners[rank] + i; 4980 anzi = ai[arow + 1] - ai[arow]; 4981 aj = a->j + ai[arow]; 4982 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4983 bnzi += nlnk; 4984 /* add received col data into lnk */ 4985 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4986 if (i == *nextrow[k]) { /* i-th row */ 4987 anzi = *(nextai[k] + 1) - *nextai[k]; 4988 aj = buf_rj[k] + *nextai[k]; 4989 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4990 bnzi += nlnk; 4991 nextrow[k]++; 4992 nextai[k]++; 4993 } 4994 } 4995 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4996 4997 /* if free space is not available, make more free space */ 4998 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4999 /* copy data into free space, then initialize lnk */ 5000 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5001 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5002 5003 current_space->array += bnzi; 5004 current_space->local_used += bnzi; 5005 current_space->local_remaining -= bnzi; 5006 5007 bi[i + 1] = bi[i] + bnzi; 5008 } 5009 5010 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5011 5012 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5013 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5014 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5015 5016 /* create symbolic parallel matrix B_mpi */ 5017 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5018 PetscCall(MatCreate(comm, &B_mpi)); 5019 if (n == PETSC_DECIDE) { 5020 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5021 } else { 5022 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5023 } 5024 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5025 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5026 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5027 MatPreallocateEnd(dnz, onz); 5028 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5029 5030 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5031 B_mpi->assembled = PETSC_FALSE; 5032 merge->bi = bi; 5033 merge->bj = bj; 5034 merge->buf_ri = buf_ri; 5035 merge->buf_rj = buf_rj; 5036 merge->coi = NULL; 5037 merge->coj = NULL; 5038 merge->owners_co = NULL; 5039 5040 PetscCall(PetscCommDestroy(&comm)); 5041 5042 /* attach the supporting struct to B_mpi for reuse */ 5043 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5044 PetscCall(PetscContainerSetPointer(container, merge)); 5045 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5046 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5047 PetscCall(PetscContainerDestroy(&container)); 5048 *mpimat = B_mpi; 5049 5050 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5051 PetscFunctionReturn(PETSC_SUCCESS); 5052 } 5053 5054 /*@C 5055 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5056 matrices from each processor 5057 5058 Collective 5059 5060 Input Parameters: 5061 + comm - the communicators the parallel matrix will live on 5062 . seqmat - the input sequential matrices 5063 . m - number of local rows (or `PETSC_DECIDE`) 5064 . n - number of local columns (or `PETSC_DECIDE`) 5065 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5066 5067 Output Parameter: 5068 . mpimat - the parallel matrix generated 5069 5070 Level: advanced 5071 5072 Note: 5073 The dimensions of the sequential matrix in each processor MUST be the same. 5074 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5075 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5076 5077 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5078 @*/ 5079 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5080 { 5081 PetscMPIInt size; 5082 5083 PetscFunctionBegin; 5084 PetscCallMPI(MPI_Comm_size(comm, &size)); 5085 if (size == 1) { 5086 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5087 if (scall == MAT_INITIAL_MATRIX) { 5088 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5089 } else { 5090 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5091 } 5092 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5093 PetscFunctionReturn(PETSC_SUCCESS); 5094 } 5095 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5096 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5097 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5098 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5099 PetscFunctionReturn(PETSC_SUCCESS); 5100 } 5101 5102 /*@ 5103 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5104 5105 Not Collective 5106 5107 Input Parameter: 5108 . A - the matrix 5109 5110 Output Parameter: 5111 . A_loc - the local sequential matrix generated 5112 5113 Level: developer 5114 5115 Notes: 5116 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5117 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5118 `n` is the global column count obtained with `MatGetSize()` 5119 5120 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5121 5122 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5123 5124 Destroy the matrix with `MatDestroy()` 5125 5126 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5127 @*/ 5128 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5129 { 5130 PetscBool mpi; 5131 5132 PetscFunctionBegin; 5133 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5134 if (mpi) { 5135 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5136 } else { 5137 *A_loc = A; 5138 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5139 } 5140 PetscFunctionReturn(PETSC_SUCCESS); 5141 } 5142 5143 /*@ 5144 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5145 5146 Not Collective 5147 5148 Input Parameters: 5149 + A - the matrix 5150 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5151 5152 Output Parameter: 5153 . A_loc - the local sequential matrix generated 5154 5155 Level: developer 5156 5157 Notes: 5158 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5159 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5160 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5161 5162 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5163 5164 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5165 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5166 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5167 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5168 5169 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5170 @*/ 5171 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5172 { 5173 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5174 Mat_SeqAIJ *mat, *a, *b; 5175 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5176 const PetscScalar *aa, *ba, *aav, *bav; 5177 PetscScalar *ca, *cam; 5178 PetscMPIInt size; 5179 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5180 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5181 PetscBool match; 5182 5183 PetscFunctionBegin; 5184 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5185 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5186 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5187 if (size == 1) { 5188 if (scall == MAT_INITIAL_MATRIX) { 5189 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5190 *A_loc = mpimat->A; 5191 } else if (scall == MAT_REUSE_MATRIX) { 5192 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5193 } 5194 PetscFunctionReturn(PETSC_SUCCESS); 5195 } 5196 5197 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5198 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5199 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5200 ai = a->i; 5201 aj = a->j; 5202 bi = b->i; 5203 bj = b->j; 5204 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5205 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5206 aa = aav; 5207 ba = bav; 5208 if (scall == MAT_INITIAL_MATRIX) { 5209 PetscCall(PetscMalloc1(1 + am, &ci)); 5210 ci[0] = 0; 5211 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5212 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5213 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5214 k = 0; 5215 for (i = 0; i < am; i++) { 5216 ncols_o = bi[i + 1] - bi[i]; 5217 ncols_d = ai[i + 1] - ai[i]; 5218 /* off-diagonal portion of A */ 5219 for (jo = 0; jo < ncols_o; jo++) { 5220 col = cmap[*bj]; 5221 if (col >= cstart) break; 5222 cj[k] = col; 5223 bj++; 5224 ca[k++] = *ba++; 5225 } 5226 /* diagonal portion of A */ 5227 for (j = 0; j < ncols_d; j++) { 5228 cj[k] = cstart + *aj++; 5229 ca[k++] = *aa++; 5230 } 5231 /* off-diagonal portion of A */ 5232 for (j = jo; j < ncols_o; j++) { 5233 cj[k] = cmap[*bj++]; 5234 ca[k++] = *ba++; 5235 } 5236 } 5237 /* put together the new matrix */ 5238 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5239 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5240 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5241 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5242 mat->free_a = PETSC_TRUE; 5243 mat->free_ij = PETSC_TRUE; 5244 mat->nonew = 0; 5245 } else if (scall == MAT_REUSE_MATRIX) { 5246 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5247 ci = mat->i; 5248 cj = mat->j; 5249 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5250 for (i = 0; i < am; i++) { 5251 /* off-diagonal portion of A */ 5252 ncols_o = bi[i + 1] - bi[i]; 5253 for (jo = 0; jo < ncols_o; jo++) { 5254 col = cmap[*bj]; 5255 if (col >= cstart) break; 5256 *cam++ = *ba++; 5257 bj++; 5258 } 5259 /* diagonal portion of A */ 5260 ncols_d = ai[i + 1] - ai[i]; 5261 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5262 /* off-diagonal portion of A */ 5263 for (j = jo; j < ncols_o; j++) { 5264 *cam++ = *ba++; 5265 bj++; 5266 } 5267 } 5268 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5269 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5270 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5271 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5272 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5273 PetscFunctionReturn(PETSC_SUCCESS); 5274 } 5275 5276 /*@ 5277 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5278 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5279 5280 Not Collective 5281 5282 Input Parameters: 5283 + A - the matrix 5284 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5285 5286 Output Parameters: 5287 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5288 - A_loc - the local sequential matrix generated 5289 5290 Level: developer 5291 5292 Note: 5293 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5294 part, then those associated with the off-diagonal part (in its local ordering) 5295 5296 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5297 @*/ 5298 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5299 { 5300 Mat Ao, Ad; 5301 const PetscInt *cmap; 5302 PetscMPIInt size; 5303 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5304 5305 PetscFunctionBegin; 5306 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5307 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5308 if (size == 1) { 5309 if (scall == MAT_INITIAL_MATRIX) { 5310 PetscCall(PetscObjectReference((PetscObject)Ad)); 5311 *A_loc = Ad; 5312 } else if (scall == MAT_REUSE_MATRIX) { 5313 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5314 } 5315 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5316 PetscFunctionReturn(PETSC_SUCCESS); 5317 } 5318 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5319 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5320 if (f) { 5321 PetscCall((*f)(A, scall, glob, A_loc)); 5322 } else { 5323 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5324 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5325 Mat_SeqAIJ *c; 5326 PetscInt *ai = a->i, *aj = a->j; 5327 PetscInt *bi = b->i, *bj = b->j; 5328 PetscInt *ci, *cj; 5329 const PetscScalar *aa, *ba; 5330 PetscScalar *ca; 5331 PetscInt i, j, am, dn, on; 5332 5333 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5334 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5335 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5336 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5337 if (scall == MAT_INITIAL_MATRIX) { 5338 PetscInt k; 5339 PetscCall(PetscMalloc1(1 + am, &ci)); 5340 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5341 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5342 ci[0] = 0; 5343 for (i = 0, k = 0; i < am; i++) { 5344 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5345 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5346 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5347 /* diagonal portion of A */ 5348 for (j = 0; j < ncols_d; j++, k++) { 5349 cj[k] = *aj++; 5350 ca[k] = *aa++; 5351 } 5352 /* off-diagonal portion of A */ 5353 for (j = 0; j < ncols_o; j++, k++) { 5354 cj[k] = dn + *bj++; 5355 ca[k] = *ba++; 5356 } 5357 } 5358 /* put together the new matrix */ 5359 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5360 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5361 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5362 c = (Mat_SeqAIJ *)(*A_loc)->data; 5363 c->free_a = PETSC_TRUE; 5364 c->free_ij = PETSC_TRUE; 5365 c->nonew = 0; 5366 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5367 } else if (scall == MAT_REUSE_MATRIX) { 5368 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5369 for (i = 0; i < am; i++) { 5370 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5371 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5372 /* diagonal portion of A */ 5373 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5374 /* off-diagonal portion of A */ 5375 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5376 } 5377 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5378 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5379 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5380 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5381 if (glob) { 5382 PetscInt cst, *gidx; 5383 5384 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5385 PetscCall(PetscMalloc1(dn + on, &gidx)); 5386 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5387 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5388 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5389 } 5390 } 5391 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5392 PetscFunctionReturn(PETSC_SUCCESS); 5393 } 5394 5395 /*@C 5396 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5397 5398 Not Collective 5399 5400 Input Parameters: 5401 + A - the matrix 5402 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5403 . row - index set of rows to extract (or `NULL`) 5404 - col - index set of columns to extract (or `NULL`) 5405 5406 Output Parameter: 5407 . A_loc - the local sequential matrix generated 5408 5409 Level: developer 5410 5411 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5412 @*/ 5413 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5414 { 5415 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5416 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5417 IS isrowa, iscola; 5418 Mat *aloc; 5419 PetscBool match; 5420 5421 PetscFunctionBegin; 5422 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5423 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5424 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5425 if (!row) { 5426 start = A->rmap->rstart; 5427 end = A->rmap->rend; 5428 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5429 } else { 5430 isrowa = *row; 5431 } 5432 if (!col) { 5433 start = A->cmap->rstart; 5434 cmap = a->garray; 5435 nzA = a->A->cmap->n; 5436 nzB = a->B->cmap->n; 5437 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5438 ncols = 0; 5439 for (i = 0; i < nzB; i++) { 5440 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5441 else break; 5442 } 5443 imark = i; 5444 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5445 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5446 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5447 } else { 5448 iscola = *col; 5449 } 5450 if (scall != MAT_INITIAL_MATRIX) { 5451 PetscCall(PetscMalloc1(1, &aloc)); 5452 aloc[0] = *A_loc; 5453 } 5454 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5455 if (!col) { /* attach global id of condensed columns */ 5456 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5457 } 5458 *A_loc = aloc[0]; 5459 PetscCall(PetscFree(aloc)); 5460 if (!row) PetscCall(ISDestroy(&isrowa)); 5461 if (!col) PetscCall(ISDestroy(&iscola)); 5462 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5463 PetscFunctionReturn(PETSC_SUCCESS); 5464 } 5465 5466 /* 5467 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5468 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5469 * on a global size. 5470 * */ 5471 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5472 { 5473 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5474 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5475 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5476 PetscMPIInt owner; 5477 PetscSFNode *iremote, *oiremote; 5478 const PetscInt *lrowindices; 5479 PetscSF sf, osf; 5480 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5481 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5482 MPI_Comm comm; 5483 ISLocalToGlobalMapping mapping; 5484 const PetscScalar *pd_a, *po_a; 5485 5486 PetscFunctionBegin; 5487 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5488 /* plocalsize is the number of roots 5489 * nrows is the number of leaves 5490 * */ 5491 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5492 PetscCall(ISGetLocalSize(rows, &nrows)); 5493 PetscCall(PetscCalloc1(nrows, &iremote)); 5494 PetscCall(ISGetIndices(rows, &lrowindices)); 5495 for (i = 0; i < nrows; i++) { 5496 /* Find a remote index and an owner for a row 5497 * The row could be local or remote 5498 * */ 5499 owner = 0; 5500 lidx = 0; 5501 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5502 iremote[i].index = lidx; 5503 iremote[i].rank = owner; 5504 } 5505 /* Create SF to communicate how many nonzero columns for each row */ 5506 PetscCall(PetscSFCreate(comm, &sf)); 5507 /* SF will figure out the number of nonzero columns for each row, and their 5508 * offsets 5509 * */ 5510 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5511 PetscCall(PetscSFSetFromOptions(sf)); 5512 PetscCall(PetscSFSetUp(sf)); 5513 5514 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5515 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5516 PetscCall(PetscCalloc1(nrows, &pnnz)); 5517 roffsets[0] = 0; 5518 roffsets[1] = 0; 5519 for (i = 0; i < plocalsize; i++) { 5520 /* diagonal */ 5521 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5522 /* off-diagonal */ 5523 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5524 /* compute offsets so that we relative location for each row */ 5525 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5526 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5527 } 5528 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5529 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5530 /* 'r' means root, and 'l' means leaf */ 5531 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5532 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5533 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5534 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5535 PetscCall(PetscSFDestroy(&sf)); 5536 PetscCall(PetscFree(roffsets)); 5537 PetscCall(PetscFree(nrcols)); 5538 dntotalcols = 0; 5539 ontotalcols = 0; 5540 ncol = 0; 5541 for (i = 0; i < nrows; i++) { 5542 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5543 ncol = PetscMax(pnnz[i], ncol); 5544 /* diagonal */ 5545 dntotalcols += nlcols[i * 2 + 0]; 5546 /* off-diagonal */ 5547 ontotalcols += nlcols[i * 2 + 1]; 5548 } 5549 /* We do not need to figure the right number of columns 5550 * since all the calculations will be done by going through the raw data 5551 * */ 5552 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5553 PetscCall(MatSetUp(*P_oth)); 5554 PetscCall(PetscFree(pnnz)); 5555 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5556 /* diagonal */ 5557 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5558 /* off-diagonal */ 5559 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5560 /* diagonal */ 5561 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5562 /* off-diagonal */ 5563 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5564 dntotalcols = 0; 5565 ontotalcols = 0; 5566 ntotalcols = 0; 5567 for (i = 0; i < nrows; i++) { 5568 owner = 0; 5569 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5570 /* Set iremote for diag matrix */ 5571 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5572 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5573 iremote[dntotalcols].rank = owner; 5574 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5575 ilocal[dntotalcols++] = ntotalcols++; 5576 } 5577 /* off-diagonal */ 5578 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5579 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5580 oiremote[ontotalcols].rank = owner; 5581 oilocal[ontotalcols++] = ntotalcols++; 5582 } 5583 } 5584 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5585 PetscCall(PetscFree(loffsets)); 5586 PetscCall(PetscFree(nlcols)); 5587 PetscCall(PetscSFCreate(comm, &sf)); 5588 /* P serves as roots and P_oth is leaves 5589 * Diag matrix 5590 * */ 5591 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5592 PetscCall(PetscSFSetFromOptions(sf)); 5593 PetscCall(PetscSFSetUp(sf)); 5594 5595 PetscCall(PetscSFCreate(comm, &osf)); 5596 /* off-diagonal */ 5597 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5598 PetscCall(PetscSFSetFromOptions(osf)); 5599 PetscCall(PetscSFSetUp(osf)); 5600 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5601 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5602 /* operate on the matrix internal data to save memory */ 5603 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5604 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5605 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5606 /* Convert to global indices for diag matrix */ 5607 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5608 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5609 /* We want P_oth store global indices */ 5610 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5611 /* Use memory scalable approach */ 5612 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5613 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5614 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5615 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5616 /* Convert back to local indices */ 5617 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5618 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5619 nout = 0; 5620 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5621 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5622 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5623 /* Exchange values */ 5624 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5625 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5626 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5627 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5628 /* Stop PETSc from shrinking memory */ 5629 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5630 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5631 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5632 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5633 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5634 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5635 PetscCall(PetscSFDestroy(&sf)); 5636 PetscCall(PetscSFDestroy(&osf)); 5637 PetscFunctionReturn(PETSC_SUCCESS); 5638 } 5639 5640 /* 5641 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5642 * This supports MPIAIJ and MAIJ 5643 * */ 5644 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5645 { 5646 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5647 Mat_SeqAIJ *p_oth; 5648 IS rows, map; 5649 PetscHMapI hamp; 5650 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5651 MPI_Comm comm; 5652 PetscSF sf, osf; 5653 PetscBool has; 5654 5655 PetscFunctionBegin; 5656 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5657 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5658 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5659 * and then create a submatrix (that often is an overlapping matrix) 5660 * */ 5661 if (reuse == MAT_INITIAL_MATRIX) { 5662 /* Use a hash table to figure out unique keys */ 5663 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5664 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5665 count = 0; 5666 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5667 for (i = 0; i < a->B->cmap->n; i++) { 5668 key = a->garray[i] / dof; 5669 PetscCall(PetscHMapIHas(hamp, key, &has)); 5670 if (!has) { 5671 mapping[i] = count; 5672 PetscCall(PetscHMapISet(hamp, key, count++)); 5673 } else { 5674 /* Current 'i' has the same value the previous step */ 5675 mapping[i] = count - 1; 5676 } 5677 } 5678 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5679 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5680 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5681 PetscCall(PetscCalloc1(htsize, &rowindices)); 5682 off = 0; 5683 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5684 PetscCall(PetscHMapIDestroy(&hamp)); 5685 PetscCall(PetscSortInt(htsize, rowindices)); 5686 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5687 /* In case, the matrix was already created but users want to recreate the matrix */ 5688 PetscCall(MatDestroy(P_oth)); 5689 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5690 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5691 PetscCall(ISDestroy(&map)); 5692 PetscCall(ISDestroy(&rows)); 5693 } else if (reuse == MAT_REUSE_MATRIX) { 5694 /* If matrix was already created, we simply update values using SF objects 5695 * that as attached to the matrix earlier. 5696 */ 5697 const PetscScalar *pd_a, *po_a; 5698 5699 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5700 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5701 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5702 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5703 /* Update values in place */ 5704 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5705 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5706 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5707 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5708 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5709 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5710 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5711 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5712 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5713 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5714 PetscFunctionReturn(PETSC_SUCCESS); 5715 } 5716 5717 /*@C 5718 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5719 5720 Collective 5721 5722 Input Parameters: 5723 + A - the first matrix in `MATMPIAIJ` format 5724 . B - the second matrix in `MATMPIAIJ` format 5725 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5726 5727 Output Parameters: 5728 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5729 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5730 - B_seq - the sequential matrix generated 5731 5732 Level: developer 5733 5734 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5735 @*/ 5736 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5737 { 5738 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5739 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5740 IS isrowb, iscolb; 5741 Mat *bseq = NULL; 5742 5743 PetscFunctionBegin; 5744 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5745 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5746 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5747 5748 if (scall == MAT_INITIAL_MATRIX) { 5749 start = A->cmap->rstart; 5750 cmap = a->garray; 5751 nzA = a->A->cmap->n; 5752 nzB = a->B->cmap->n; 5753 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5754 ncols = 0; 5755 for (i = 0; i < nzB; i++) { /* row < local row index */ 5756 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5757 else break; 5758 } 5759 imark = i; 5760 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5761 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5762 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5763 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5764 } else { 5765 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5766 isrowb = *rowb; 5767 iscolb = *colb; 5768 PetscCall(PetscMalloc1(1, &bseq)); 5769 bseq[0] = *B_seq; 5770 } 5771 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5772 *B_seq = bseq[0]; 5773 PetscCall(PetscFree(bseq)); 5774 if (!rowb) { 5775 PetscCall(ISDestroy(&isrowb)); 5776 } else { 5777 *rowb = isrowb; 5778 } 5779 if (!colb) { 5780 PetscCall(ISDestroy(&iscolb)); 5781 } else { 5782 *colb = iscolb; 5783 } 5784 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5785 PetscFunctionReturn(PETSC_SUCCESS); 5786 } 5787 5788 /* 5789 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5790 of the OFF-DIAGONAL portion of local A 5791 5792 Collective 5793 5794 Input Parameters: 5795 + A,B - the matrices in `MATMPIAIJ` format 5796 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5797 5798 Output Parameter: 5799 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5800 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5801 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5802 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5803 5804 Developer Note: 5805 This directly accesses information inside the VecScatter associated with the matrix-vector product 5806 for this matrix. This is not desirable.. 5807 5808 Level: developer 5809 5810 */ 5811 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5812 { 5813 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5814 Mat_SeqAIJ *b_oth; 5815 VecScatter ctx; 5816 MPI_Comm comm; 5817 const PetscMPIInt *rprocs, *sprocs; 5818 const PetscInt *srow, *rstarts, *sstarts; 5819 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5820 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5821 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5822 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5823 PetscMPIInt size, tag, rank, nreqs; 5824 5825 PetscFunctionBegin; 5826 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5827 PetscCallMPI(MPI_Comm_size(comm, &size)); 5828 5829 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5830 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5831 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5832 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5833 5834 if (size == 1) { 5835 startsj_s = NULL; 5836 bufa_ptr = NULL; 5837 *B_oth = NULL; 5838 PetscFunctionReturn(PETSC_SUCCESS); 5839 } 5840 5841 ctx = a->Mvctx; 5842 tag = ((PetscObject)ctx)->tag; 5843 5844 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5845 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5846 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5847 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5848 PetscCall(PetscMalloc1(nreqs, &reqs)); 5849 rwaits = reqs; 5850 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5851 5852 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5853 if (scall == MAT_INITIAL_MATRIX) { 5854 /* i-array */ 5855 /* post receives */ 5856 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5857 for (i = 0; i < nrecvs; i++) { 5858 rowlen = rvalues + rstarts[i] * rbs; 5859 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5860 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5861 } 5862 5863 /* pack the outgoing message */ 5864 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5865 5866 sstartsj[0] = 0; 5867 rstartsj[0] = 0; 5868 len = 0; /* total length of j or a array to be sent */ 5869 if (nsends) { 5870 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5871 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5872 } 5873 for (i = 0; i < nsends; i++) { 5874 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5875 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5876 for (j = 0; j < nrows; j++) { 5877 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5878 for (l = 0; l < sbs; l++) { 5879 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5880 5881 rowlen[j * sbs + l] = ncols; 5882 5883 len += ncols; 5884 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5885 } 5886 k++; 5887 } 5888 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5889 5890 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5891 } 5892 /* recvs and sends of i-array are completed */ 5893 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5894 PetscCall(PetscFree(svalues)); 5895 5896 /* allocate buffers for sending j and a arrays */ 5897 PetscCall(PetscMalloc1(len + 1, &bufj)); 5898 PetscCall(PetscMalloc1(len + 1, &bufa)); 5899 5900 /* create i-array of B_oth */ 5901 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5902 5903 b_othi[0] = 0; 5904 len = 0; /* total length of j or a array to be received */ 5905 k = 0; 5906 for (i = 0; i < nrecvs; i++) { 5907 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5908 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5909 for (j = 0; j < nrows; j++) { 5910 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5911 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5912 k++; 5913 } 5914 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5915 } 5916 PetscCall(PetscFree(rvalues)); 5917 5918 /* allocate space for j and a arrays of B_oth */ 5919 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5920 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5921 5922 /* j-array */ 5923 /* post receives of j-array */ 5924 for (i = 0; i < nrecvs; i++) { 5925 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5926 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5927 } 5928 5929 /* pack the outgoing message j-array */ 5930 if (nsends) k = sstarts[0]; 5931 for (i = 0; i < nsends; i++) { 5932 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5933 bufJ = bufj + sstartsj[i]; 5934 for (j = 0; j < nrows; j++) { 5935 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5936 for (ll = 0; ll < sbs; ll++) { 5937 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5938 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5939 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5940 } 5941 } 5942 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5943 } 5944 5945 /* recvs and sends of j-array are completed */ 5946 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5947 } else if (scall == MAT_REUSE_MATRIX) { 5948 sstartsj = *startsj_s; 5949 rstartsj = *startsj_r; 5950 bufa = *bufa_ptr; 5951 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5952 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5953 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5954 5955 /* a-array */ 5956 /* post receives of a-array */ 5957 for (i = 0; i < nrecvs; i++) { 5958 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5959 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5960 } 5961 5962 /* pack the outgoing message a-array */ 5963 if (nsends) k = sstarts[0]; 5964 for (i = 0; i < nsends; i++) { 5965 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5966 bufA = bufa + sstartsj[i]; 5967 for (j = 0; j < nrows; j++) { 5968 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5969 for (ll = 0; ll < sbs; ll++) { 5970 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5971 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5972 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5973 } 5974 } 5975 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5976 } 5977 /* recvs and sends of a-array are completed */ 5978 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5979 PetscCall(PetscFree(reqs)); 5980 5981 if (scall == MAT_INITIAL_MATRIX) { 5982 /* put together the new matrix */ 5983 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5984 5985 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5986 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5987 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5988 b_oth->free_a = PETSC_TRUE; 5989 b_oth->free_ij = PETSC_TRUE; 5990 b_oth->nonew = 0; 5991 5992 PetscCall(PetscFree(bufj)); 5993 if (!startsj_s || !bufa_ptr) { 5994 PetscCall(PetscFree2(sstartsj, rstartsj)); 5995 PetscCall(PetscFree(bufa_ptr)); 5996 } else { 5997 *startsj_s = sstartsj; 5998 *startsj_r = rstartsj; 5999 *bufa_ptr = bufa; 6000 } 6001 } else if (scall == MAT_REUSE_MATRIX) { 6002 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6003 } 6004 6005 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6006 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6007 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6008 PetscFunctionReturn(PETSC_SUCCESS); 6009 } 6010 6011 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6012 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6013 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6014 #if defined(PETSC_HAVE_MKL_SPARSE) 6015 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6016 #endif 6017 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6018 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6019 #if defined(PETSC_HAVE_ELEMENTAL) 6020 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6021 #endif 6022 #if defined(PETSC_HAVE_SCALAPACK) 6023 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6024 #endif 6025 #if defined(PETSC_HAVE_HYPRE) 6026 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6027 #endif 6028 #if defined(PETSC_HAVE_CUDA) 6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6030 #endif 6031 #if defined(PETSC_HAVE_HIP) 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6033 #endif 6034 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6036 #endif 6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6038 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6039 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6040 6041 /* 6042 Computes (B'*A')' since computing B*A directly is untenable 6043 6044 n p p 6045 [ ] [ ] [ ] 6046 m [ A ] * n [ B ] = m [ C ] 6047 [ ] [ ] [ ] 6048 6049 */ 6050 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6051 { 6052 Mat At, Bt, Ct; 6053 6054 PetscFunctionBegin; 6055 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6056 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6057 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6058 PetscCall(MatDestroy(&At)); 6059 PetscCall(MatDestroy(&Bt)); 6060 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6061 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6062 PetscCall(MatDestroy(&Ct)); 6063 PetscFunctionReturn(PETSC_SUCCESS); 6064 } 6065 6066 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6067 { 6068 PetscBool cisdense; 6069 6070 PetscFunctionBegin; 6071 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6072 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6073 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6074 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6075 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6076 PetscCall(MatSetUp(C)); 6077 6078 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6079 PetscFunctionReturn(PETSC_SUCCESS); 6080 } 6081 6082 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6083 { 6084 Mat_Product *product = C->product; 6085 Mat A = product->A, B = product->B; 6086 6087 PetscFunctionBegin; 6088 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6089 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6090 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6091 C->ops->productsymbolic = MatProductSymbolic_AB; 6092 PetscFunctionReturn(PETSC_SUCCESS); 6093 } 6094 6095 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6096 { 6097 Mat_Product *product = C->product; 6098 6099 PetscFunctionBegin; 6100 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6101 PetscFunctionReturn(PETSC_SUCCESS); 6102 } 6103 6104 /* 6105 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6106 6107 Input Parameters: 6108 6109 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6110 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6111 6112 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6113 6114 For Set1, j1[] contains column indices of the nonzeros. 6115 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6116 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6117 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6118 6119 Similar for Set2. 6120 6121 This routine merges the two sets of nonzeros row by row and removes repeats. 6122 6123 Output Parameters: (memory is allocated by the caller) 6124 6125 i[],j[]: the CSR of the merged matrix, which has m rows. 6126 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6127 imap2[]: similar to imap1[], but for Set2. 6128 Note we order nonzeros row-by-row and from left to right. 6129 */ 6130 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6131 { 6132 PetscInt r, m; /* Row index of mat */ 6133 PetscCount t, t1, t2, b1, e1, b2, e2; 6134 6135 PetscFunctionBegin; 6136 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6137 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6138 i[0] = 0; 6139 for (r = 0; r < m; r++) { /* Do row by row merging */ 6140 b1 = rowBegin1[r]; 6141 e1 = rowEnd1[r]; 6142 b2 = rowBegin2[r]; 6143 e2 = rowEnd2[r]; 6144 while (b1 < e1 && b2 < e2) { 6145 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6146 j[t] = j1[b1]; 6147 imap1[t1] = t; 6148 imap2[t2] = t; 6149 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6150 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6151 t1++; 6152 t2++; 6153 t++; 6154 } else if (j1[b1] < j2[b2]) { 6155 j[t] = j1[b1]; 6156 imap1[t1] = t; 6157 b1 += jmap1[t1 + 1] - jmap1[t1]; 6158 t1++; 6159 t++; 6160 } else { 6161 j[t] = j2[b2]; 6162 imap2[t2] = t; 6163 b2 += jmap2[t2 + 1] - jmap2[t2]; 6164 t2++; 6165 t++; 6166 } 6167 } 6168 /* Merge the remaining in either j1[] or j2[] */ 6169 while (b1 < e1) { 6170 j[t] = j1[b1]; 6171 imap1[t1] = t; 6172 b1 += jmap1[t1 + 1] - jmap1[t1]; 6173 t1++; 6174 t++; 6175 } 6176 while (b2 < e2) { 6177 j[t] = j2[b2]; 6178 imap2[t2] = t; 6179 b2 += jmap2[t2 + 1] - jmap2[t2]; 6180 t2++; 6181 t++; 6182 } 6183 i[r + 1] = t; 6184 } 6185 PetscFunctionReturn(PETSC_SUCCESS); 6186 } 6187 6188 /* 6189 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6190 6191 Input Parameters: 6192 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6193 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6194 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6195 6196 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6197 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6198 6199 Output Parameters: 6200 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6201 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6202 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6203 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6204 6205 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6206 Atot: number of entries belonging to the diagonal block. 6207 Annz: number of unique nonzeros belonging to the diagonal block. 6208 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6209 repeats (i.e., same 'i,j' pair). 6210 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6211 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6212 6213 Atot: number of entries belonging to the diagonal block 6214 Annz: number of unique nonzeros belonging to the diagonal block. 6215 6216 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6217 6218 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6219 */ 6220 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6221 { 6222 PetscInt cstart, cend, rstart, rend, row, col; 6223 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6224 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6225 PetscCount k, m, p, q, r, s, mid; 6226 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6227 6228 PetscFunctionBegin; 6229 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6230 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6231 m = rend - rstart; 6232 6233 /* Skip negative rows */ 6234 for (k = 0; k < n; k++) 6235 if (i[k] >= 0) break; 6236 6237 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6238 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6239 */ 6240 while (k < n) { 6241 row = i[k]; 6242 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6243 for (s = k; s < n; s++) 6244 if (i[s] != row) break; 6245 6246 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6247 for (p = k; p < s; p++) { 6248 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6249 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6250 } 6251 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6252 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6253 rowBegin[row - rstart] = k; 6254 rowMid[row - rstart] = mid; 6255 rowEnd[row - rstart] = s; 6256 6257 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6258 Atot += mid - k; 6259 Btot += s - mid; 6260 6261 /* Count unique nonzeros of this diag row */ 6262 for (p = k; p < mid;) { 6263 col = j[p]; 6264 do { 6265 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6266 p++; 6267 } while (p < mid && j[p] == col); 6268 Annz++; 6269 } 6270 6271 /* Count unique nonzeros of this offdiag row */ 6272 for (p = mid; p < s;) { 6273 col = j[p]; 6274 do { 6275 p++; 6276 } while (p < s && j[p] == col); 6277 Bnnz++; 6278 } 6279 k = s; 6280 } 6281 6282 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6283 PetscCall(PetscMalloc1(Atot, &Aperm)); 6284 PetscCall(PetscMalloc1(Btot, &Bperm)); 6285 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6286 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6287 6288 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6289 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6290 for (r = 0; r < m; r++) { 6291 k = rowBegin[r]; 6292 mid = rowMid[r]; 6293 s = rowEnd[r]; 6294 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6295 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6296 Atot += mid - k; 6297 Btot += s - mid; 6298 6299 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6300 for (p = k; p < mid;) { 6301 col = j[p]; 6302 q = p; 6303 do { 6304 p++; 6305 } while (p < mid && j[p] == col); 6306 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6307 Annz++; 6308 } 6309 6310 for (p = mid; p < s;) { 6311 col = j[p]; 6312 q = p; 6313 do { 6314 p++; 6315 } while (p < s && j[p] == col); 6316 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6317 Bnnz++; 6318 } 6319 } 6320 /* Output */ 6321 *Aperm_ = Aperm; 6322 *Annz_ = Annz; 6323 *Atot_ = Atot; 6324 *Ajmap_ = Ajmap; 6325 *Bperm_ = Bperm; 6326 *Bnnz_ = Bnnz; 6327 *Btot_ = Btot; 6328 *Bjmap_ = Bjmap; 6329 PetscFunctionReturn(PETSC_SUCCESS); 6330 } 6331 6332 /* 6333 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6334 6335 Input Parameters: 6336 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6337 nnz: number of unique nonzeros in the merged matrix 6338 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6339 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6340 6341 Output Parameter: (memory is allocated by the caller) 6342 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6343 6344 Example: 6345 nnz1 = 4 6346 nnz = 6 6347 imap = [1,3,4,5] 6348 jmap = [0,3,5,6,7] 6349 then, 6350 jmap_new = [0,0,3,3,5,6,7] 6351 */ 6352 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6353 { 6354 PetscCount k, p; 6355 6356 PetscFunctionBegin; 6357 jmap_new[0] = 0; 6358 p = nnz; /* p loops over jmap_new[] backwards */ 6359 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6360 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6361 } 6362 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6363 PetscFunctionReturn(PETSC_SUCCESS); 6364 } 6365 6366 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6367 { 6368 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6369 6370 PetscFunctionBegin; 6371 PetscCall(PetscSFDestroy(&coo->sf)); 6372 PetscCall(PetscFree(coo->Aperm1)); 6373 PetscCall(PetscFree(coo->Bperm1)); 6374 PetscCall(PetscFree(coo->Ajmap1)); 6375 PetscCall(PetscFree(coo->Bjmap1)); 6376 PetscCall(PetscFree(coo->Aimap2)); 6377 PetscCall(PetscFree(coo->Bimap2)); 6378 PetscCall(PetscFree(coo->Aperm2)); 6379 PetscCall(PetscFree(coo->Bperm2)); 6380 PetscCall(PetscFree(coo->Ajmap2)); 6381 PetscCall(PetscFree(coo->Bjmap2)); 6382 PetscCall(PetscFree(coo->Cperm1)); 6383 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6384 PetscCall(PetscFree(coo)); 6385 PetscFunctionReturn(PETSC_SUCCESS); 6386 } 6387 6388 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6389 { 6390 MPI_Comm comm; 6391 PetscMPIInt rank, size; 6392 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6393 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6394 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6395 PetscContainer container; 6396 MatCOOStruct_MPIAIJ *coo; 6397 6398 PetscFunctionBegin; 6399 PetscCall(PetscFree(mpiaij->garray)); 6400 PetscCall(VecDestroy(&mpiaij->lvec)); 6401 #if defined(PETSC_USE_CTABLE) 6402 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6403 #else 6404 PetscCall(PetscFree(mpiaij->colmap)); 6405 #endif 6406 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6407 mat->assembled = PETSC_FALSE; 6408 mat->was_assembled = PETSC_FALSE; 6409 6410 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6411 PetscCallMPI(MPI_Comm_size(comm, &size)); 6412 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6413 PetscCall(PetscLayoutSetUp(mat->rmap)); 6414 PetscCall(PetscLayoutSetUp(mat->cmap)); 6415 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6416 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6417 PetscCall(MatGetLocalSize(mat, &m, &n)); 6418 PetscCall(MatGetSize(mat, &M, &N)); 6419 6420 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6421 /* entries come first, then local rows, then remote rows. */ 6422 PetscCount n1 = coo_n, *perm1; 6423 PetscInt *i1 = coo_i, *j1 = coo_j; 6424 6425 PetscCall(PetscMalloc1(n1, &perm1)); 6426 for (k = 0; k < n1; k++) perm1[k] = k; 6427 6428 /* Manipulate indices so that entries with negative row or col indices will have smallest 6429 row indices, local entries will have greater but negative row indices, and remote entries 6430 will have positive row indices. 6431 */ 6432 for (k = 0; k < n1; k++) { 6433 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6434 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6435 else { 6436 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6437 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6438 } 6439 } 6440 6441 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6442 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6443 6444 /* Advance k to the first entry we need to take care of */ 6445 for (k = 0; k < n1; k++) 6446 if (i1[k] > PETSC_MIN_INT) break; 6447 PetscInt i1start = k; 6448 6449 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6450 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6451 6452 /* Send remote rows to their owner */ 6453 /* Find which rows should be sent to which remote ranks*/ 6454 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6455 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6456 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6457 const PetscInt *ranges; 6458 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6459 6460 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6461 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6462 for (k = rem; k < n1;) { 6463 PetscMPIInt owner; 6464 PetscInt firstRow, lastRow; 6465 6466 /* Locate a row range */ 6467 firstRow = i1[k]; /* first row of this owner */ 6468 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6469 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6470 6471 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6472 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6473 6474 /* All entries in [k,p) belong to this remote owner */ 6475 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6476 PetscMPIInt *sendto2; 6477 PetscInt *nentries2; 6478 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6479 6480 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6481 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6482 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6483 PetscCall(PetscFree2(sendto, nentries2)); 6484 sendto = sendto2; 6485 nentries = nentries2; 6486 maxNsend = maxNsend2; 6487 } 6488 sendto[nsend] = owner; 6489 nentries[nsend] = p - k; 6490 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6491 nsend++; 6492 k = p; 6493 } 6494 6495 /* Build 1st SF to know offsets on remote to send data */ 6496 PetscSF sf1; 6497 PetscInt nroots = 1, nroots2 = 0; 6498 PetscInt nleaves = nsend, nleaves2 = 0; 6499 PetscInt *offsets; 6500 PetscSFNode *iremote; 6501 6502 PetscCall(PetscSFCreate(comm, &sf1)); 6503 PetscCall(PetscMalloc1(nsend, &iremote)); 6504 PetscCall(PetscMalloc1(nsend, &offsets)); 6505 for (k = 0; k < nsend; k++) { 6506 iremote[k].rank = sendto[k]; 6507 iremote[k].index = 0; 6508 nleaves2 += nentries[k]; 6509 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6510 } 6511 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6512 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6513 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6514 PetscCall(PetscSFDestroy(&sf1)); 6515 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6516 6517 /* Build 2nd SF to send remote COOs to their owner */ 6518 PetscSF sf2; 6519 nroots = nroots2; 6520 nleaves = nleaves2; 6521 PetscCall(PetscSFCreate(comm, &sf2)); 6522 PetscCall(PetscSFSetFromOptions(sf2)); 6523 PetscCall(PetscMalloc1(nleaves, &iremote)); 6524 p = 0; 6525 for (k = 0; k < nsend; k++) { 6526 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6527 for (q = 0; q < nentries[k]; q++, p++) { 6528 iremote[p].rank = sendto[k]; 6529 iremote[p].index = offsets[k] + q; 6530 } 6531 } 6532 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6533 6534 /* Send the remote COOs to their owner */ 6535 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6536 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6537 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6538 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6539 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6540 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6541 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6542 6543 PetscCall(PetscFree(offsets)); 6544 PetscCall(PetscFree2(sendto, nentries)); 6545 6546 /* Sort received COOs by row along with the permutation array */ 6547 for (k = 0; k < n2; k++) perm2[k] = k; 6548 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6549 6550 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6551 PetscCount *Cperm1; 6552 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6553 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves)); 6554 6555 /* Support for HYPRE matrices, kind of a hack. 6556 Swap min column with diagonal so that diagonal values will go first */ 6557 PetscBool hypre; 6558 const char *name; 6559 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6560 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6561 if (hypre) { 6562 PetscInt *minj; 6563 PetscBT hasdiag; 6564 6565 PetscCall(PetscBTCreate(m, &hasdiag)); 6566 PetscCall(PetscMalloc1(m, &minj)); 6567 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6568 for (k = i1start; k < rem; k++) { 6569 if (j1[k] < cstart || j1[k] >= cend) continue; 6570 const PetscInt rindex = i1[k] - rstart; 6571 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6572 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6573 } 6574 for (k = 0; k < n2; k++) { 6575 if (j2[k] < cstart || j2[k] >= cend) continue; 6576 const PetscInt rindex = i2[k] - rstart; 6577 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6578 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6579 } 6580 for (k = i1start; k < rem; k++) { 6581 const PetscInt rindex = i1[k] - rstart; 6582 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6583 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6584 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6585 } 6586 for (k = 0; k < n2; k++) { 6587 const PetscInt rindex = i2[k] - rstart; 6588 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6589 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6590 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6591 } 6592 PetscCall(PetscBTDestroy(&hasdiag)); 6593 PetscCall(PetscFree(minj)); 6594 } 6595 6596 /* Split local COOs and received COOs into diag/offdiag portions */ 6597 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6598 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6599 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6600 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6601 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6602 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6603 6604 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6605 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6606 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6607 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6608 6609 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6610 PetscInt *Ai, *Bi; 6611 PetscInt *Aj, *Bj; 6612 6613 PetscCall(PetscMalloc1(m + 1, &Ai)); 6614 PetscCall(PetscMalloc1(m + 1, &Bi)); 6615 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6616 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6617 6618 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6619 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6620 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6621 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6622 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6623 6624 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6625 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6626 6627 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6628 /* expect nonzeros in A/B most likely have local contributing entries */ 6629 PetscInt Annz = Ai[m]; 6630 PetscInt Bnnz = Bi[m]; 6631 PetscCount *Ajmap1_new, *Bjmap1_new; 6632 6633 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6634 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6635 6636 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6637 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6638 6639 PetscCall(PetscFree(Aimap1)); 6640 PetscCall(PetscFree(Ajmap1)); 6641 PetscCall(PetscFree(Bimap1)); 6642 PetscCall(PetscFree(Bjmap1)); 6643 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6644 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6645 PetscCall(PetscFree(perm1)); 6646 PetscCall(PetscFree3(i2, j2, perm2)); 6647 6648 Ajmap1 = Ajmap1_new; 6649 Bjmap1 = Bjmap1_new; 6650 6651 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6652 if (Annz < Annz1 + Annz2) { 6653 PetscInt *Aj_new; 6654 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6655 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6656 PetscCall(PetscFree(Aj)); 6657 Aj = Aj_new; 6658 } 6659 6660 if (Bnnz < Bnnz1 + Bnnz2) { 6661 PetscInt *Bj_new; 6662 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6663 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6664 PetscCall(PetscFree(Bj)); 6665 Bj = Bj_new; 6666 } 6667 6668 /* Create new submatrices for on-process and off-process coupling */ 6669 PetscScalar *Aa, *Ba; 6670 MatType rtype; 6671 Mat_SeqAIJ *a, *b; 6672 PetscObjectState state; 6673 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6674 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6675 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6676 if (cstart) { 6677 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6678 } 6679 PetscCall(MatDestroy(&mpiaij->A)); 6680 PetscCall(MatDestroy(&mpiaij->B)); 6681 PetscCall(MatGetRootType_Private(mat, &rtype)); 6682 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6683 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6684 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6685 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6686 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6687 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6688 6689 a = (Mat_SeqAIJ *)mpiaij->A->data; 6690 b = (Mat_SeqAIJ *)mpiaij->B->data; 6691 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6692 a->free_a = b->free_a = PETSC_TRUE; 6693 a->free_ij = b->free_ij = PETSC_TRUE; 6694 6695 /* conversion must happen AFTER multiply setup */ 6696 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6697 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6698 PetscCall(VecDestroy(&mpiaij->lvec)); 6699 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6700 6701 // Put the COO struct in a container and then attach that to the matrix 6702 PetscCall(PetscMalloc1(1, &coo)); 6703 coo->n = coo_n; 6704 coo->sf = sf2; 6705 coo->sendlen = nleaves; 6706 coo->recvlen = nroots; 6707 coo->Annz = Annz; 6708 coo->Bnnz = Bnnz; 6709 coo->Annz2 = Annz2; 6710 coo->Bnnz2 = Bnnz2; 6711 coo->Atot1 = Atot1; 6712 coo->Atot2 = Atot2; 6713 coo->Btot1 = Btot1; 6714 coo->Btot2 = Btot2; 6715 coo->Ajmap1 = Ajmap1; 6716 coo->Aperm1 = Aperm1; 6717 coo->Bjmap1 = Bjmap1; 6718 coo->Bperm1 = Bperm1; 6719 coo->Aimap2 = Aimap2; 6720 coo->Ajmap2 = Ajmap2; 6721 coo->Aperm2 = Aperm2; 6722 coo->Bimap2 = Bimap2; 6723 coo->Bjmap2 = Bjmap2; 6724 coo->Bperm2 = Bperm2; 6725 coo->Cperm1 = Cperm1; 6726 // Allocate in preallocation. If not used, it has zero cost on host 6727 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6728 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6729 PetscCall(PetscContainerSetPointer(container, coo)); 6730 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6731 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6732 PetscCall(PetscContainerDestroy(&container)); 6733 PetscFunctionReturn(PETSC_SUCCESS); 6734 } 6735 6736 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6737 { 6738 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6739 Mat A = mpiaij->A, B = mpiaij->B; 6740 PetscScalar *Aa, *Ba; 6741 PetscScalar *sendbuf, *recvbuf; 6742 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6743 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6744 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6745 const PetscCount *Cperm1; 6746 PetscContainer container; 6747 MatCOOStruct_MPIAIJ *coo; 6748 6749 PetscFunctionBegin; 6750 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6751 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6752 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6753 sendbuf = coo->sendbuf; 6754 recvbuf = coo->recvbuf; 6755 Ajmap1 = coo->Ajmap1; 6756 Ajmap2 = coo->Ajmap2; 6757 Aimap2 = coo->Aimap2; 6758 Bjmap1 = coo->Bjmap1; 6759 Bjmap2 = coo->Bjmap2; 6760 Bimap2 = coo->Bimap2; 6761 Aperm1 = coo->Aperm1; 6762 Aperm2 = coo->Aperm2; 6763 Bperm1 = coo->Bperm1; 6764 Bperm2 = coo->Bperm2; 6765 Cperm1 = coo->Cperm1; 6766 6767 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6768 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6769 6770 /* Pack entries to be sent to remote */ 6771 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6772 6773 /* Send remote entries to their owner and overlap the communication with local computation */ 6774 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6775 /* Add local entries to A and B */ 6776 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6777 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6778 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6779 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6780 } 6781 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6782 PetscScalar sum = 0.0; 6783 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6784 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6785 } 6786 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6787 6788 /* Add received remote entries to A and B */ 6789 for (PetscCount i = 0; i < coo->Annz2; i++) { 6790 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6791 } 6792 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6793 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6794 } 6795 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6796 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6797 PetscFunctionReturn(PETSC_SUCCESS); 6798 } 6799 6800 /*MC 6801 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6802 6803 Options Database Keys: 6804 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6805 6806 Level: beginner 6807 6808 Notes: 6809 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6810 in this case the values associated with the rows and columns one passes in are set to zero 6811 in the matrix 6812 6813 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6814 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6815 6816 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6817 M*/ 6818 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6819 { 6820 Mat_MPIAIJ *b; 6821 PetscMPIInt size; 6822 6823 PetscFunctionBegin; 6824 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6825 6826 PetscCall(PetscNew(&b)); 6827 B->data = (void *)b; 6828 B->ops[0] = MatOps_Values; 6829 B->assembled = PETSC_FALSE; 6830 B->insertmode = NOT_SET_VALUES; 6831 b->size = size; 6832 6833 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6834 6835 /* build cache for off array entries formed */ 6836 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6837 6838 b->donotstash = PETSC_FALSE; 6839 b->colmap = NULL; 6840 b->garray = NULL; 6841 b->roworiented = PETSC_TRUE; 6842 6843 /* stuff used for matrix vector multiply */ 6844 b->lvec = NULL; 6845 b->Mvctx = NULL; 6846 6847 /* stuff for MatGetRow() */ 6848 b->rowindices = NULL; 6849 b->rowvalues = NULL; 6850 b->getrowactive = PETSC_FALSE; 6851 6852 /* flexible pointer used in CUSPARSE classes */ 6853 b->spptr = NULL; 6854 6855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6859 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6860 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6861 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6862 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6863 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6864 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6865 #if defined(PETSC_HAVE_CUDA) 6866 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6867 #endif 6868 #if defined(PETSC_HAVE_HIP) 6869 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6870 #endif 6871 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6872 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6873 #endif 6874 #if defined(PETSC_HAVE_MKL_SPARSE) 6875 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6876 #endif 6877 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6878 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6879 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6880 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6881 #if defined(PETSC_HAVE_ELEMENTAL) 6882 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6883 #endif 6884 #if defined(PETSC_HAVE_SCALAPACK) 6885 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6886 #endif 6887 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6889 #if defined(PETSC_HAVE_HYPRE) 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6892 #endif 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6897 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6898 PetscFunctionReturn(PETSC_SUCCESS); 6899 } 6900 6901 /*@C 6902 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6903 and "off-diagonal" part of the matrix in CSR format. 6904 6905 Collective 6906 6907 Input Parameters: 6908 + comm - MPI communicator 6909 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6910 . n - This value should be the same as the local size used in creating the 6911 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6912 calculated if `N` is given) For square matrices `n` is almost always `m`. 6913 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6914 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6915 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6916 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6917 . a - matrix values 6918 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6919 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6920 - oa - matrix values 6921 6922 Output Parameter: 6923 . mat - the matrix 6924 6925 Level: advanced 6926 6927 Notes: 6928 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6929 must free the arrays once the matrix has been destroyed and not before. 6930 6931 The `i` and `j` indices are 0 based 6932 6933 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6934 6935 This sets local rows and cannot be used to set off-processor values. 6936 6937 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6938 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6939 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6940 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6941 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6942 communication if it is known that only local entries will be set. 6943 6944 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6945 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6946 @*/ 6947 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6948 { 6949 Mat_MPIAIJ *maij; 6950 6951 PetscFunctionBegin; 6952 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6953 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6954 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6955 PetscCall(MatCreate(comm, mat)); 6956 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6957 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6958 maij = (Mat_MPIAIJ *)(*mat)->data; 6959 6960 (*mat)->preallocated = PETSC_TRUE; 6961 6962 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6963 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6964 6965 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6966 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6967 6968 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6969 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6970 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6971 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6972 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6973 PetscFunctionReturn(PETSC_SUCCESS); 6974 } 6975 6976 typedef struct { 6977 Mat *mp; /* intermediate products */ 6978 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6979 PetscInt cp; /* number of intermediate products */ 6980 6981 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6982 PetscInt *startsj_s, *startsj_r; 6983 PetscScalar *bufa; 6984 Mat P_oth; 6985 6986 /* may take advantage of merging product->B */ 6987 Mat Bloc; /* B-local by merging diag and off-diag */ 6988 6989 /* cusparse does not have support to split between symbolic and numeric phases. 6990 When api_user is true, we don't need to update the numerical values 6991 of the temporary storage */ 6992 PetscBool reusesym; 6993 6994 /* support for COO values insertion */ 6995 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6996 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6997 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6998 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6999 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7000 PetscMemType mtype; 7001 7002 /* customization */ 7003 PetscBool abmerge; 7004 PetscBool P_oth_bind; 7005 } MatMatMPIAIJBACKEND; 7006 7007 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7008 { 7009 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7010 PetscInt i; 7011 7012 PetscFunctionBegin; 7013 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7014 PetscCall(PetscFree(mmdata->bufa)); 7015 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7016 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7017 PetscCall(MatDestroy(&mmdata->P_oth)); 7018 PetscCall(MatDestroy(&mmdata->Bloc)); 7019 PetscCall(PetscSFDestroy(&mmdata->sf)); 7020 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7021 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7022 PetscCall(PetscFree(mmdata->own[0])); 7023 PetscCall(PetscFree(mmdata->own)); 7024 PetscCall(PetscFree(mmdata->off[0])); 7025 PetscCall(PetscFree(mmdata->off)); 7026 PetscCall(PetscFree(mmdata)); 7027 PetscFunctionReturn(PETSC_SUCCESS); 7028 } 7029 7030 /* Copy selected n entries with indices in idx[] of A to v[]. 7031 If idx is NULL, copy the whole data array of A to v[] 7032 */ 7033 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7034 { 7035 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7036 7037 PetscFunctionBegin; 7038 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7039 if (f) { 7040 PetscCall((*f)(A, n, idx, v)); 7041 } else { 7042 const PetscScalar *vv; 7043 7044 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7045 if (n && idx) { 7046 PetscScalar *w = v; 7047 const PetscInt *oi = idx; 7048 PetscInt j; 7049 7050 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7051 } else { 7052 PetscCall(PetscArraycpy(v, vv, n)); 7053 } 7054 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7055 } 7056 PetscFunctionReturn(PETSC_SUCCESS); 7057 } 7058 7059 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7060 { 7061 MatMatMPIAIJBACKEND *mmdata; 7062 PetscInt i, n_d, n_o; 7063 7064 PetscFunctionBegin; 7065 MatCheckProduct(C, 1); 7066 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7067 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7068 if (!mmdata->reusesym) { /* update temporary matrices */ 7069 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7070 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7071 } 7072 mmdata->reusesym = PETSC_FALSE; 7073 7074 for (i = 0; i < mmdata->cp; i++) { 7075 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7076 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7077 } 7078 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7079 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7080 7081 if (mmdata->mptmp[i]) continue; 7082 if (noff) { 7083 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7084 7085 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7086 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7087 n_o += noff; 7088 n_d += nown; 7089 } else { 7090 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7091 7092 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7093 n_d += mm->nz; 7094 } 7095 } 7096 if (mmdata->hasoffproc) { /* offprocess insertion */ 7097 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7098 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7099 } 7100 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7101 PetscFunctionReturn(PETSC_SUCCESS); 7102 } 7103 7104 /* Support for Pt * A, A * P, or Pt * A * P */ 7105 #define MAX_NUMBER_INTERMEDIATE 4 7106 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7107 { 7108 Mat_Product *product = C->product; 7109 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7110 Mat_MPIAIJ *a, *p; 7111 MatMatMPIAIJBACKEND *mmdata; 7112 ISLocalToGlobalMapping P_oth_l2g = NULL; 7113 IS glob = NULL; 7114 const char *prefix; 7115 char pprefix[256]; 7116 const PetscInt *globidx, *P_oth_idx; 7117 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7118 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7119 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7120 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7121 /* a base offset; type-2: sparse with a local to global map table */ 7122 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7123 7124 MatProductType ptype; 7125 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7126 PetscMPIInt size; 7127 7128 PetscFunctionBegin; 7129 MatCheckProduct(C, 1); 7130 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7131 ptype = product->type; 7132 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7133 ptype = MATPRODUCT_AB; 7134 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7135 } 7136 switch (ptype) { 7137 case MATPRODUCT_AB: 7138 A = product->A; 7139 P = product->B; 7140 m = A->rmap->n; 7141 n = P->cmap->n; 7142 M = A->rmap->N; 7143 N = P->cmap->N; 7144 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7145 break; 7146 case MATPRODUCT_AtB: 7147 P = product->A; 7148 A = product->B; 7149 m = P->cmap->n; 7150 n = A->cmap->n; 7151 M = P->cmap->N; 7152 N = A->cmap->N; 7153 hasoffproc = PETSC_TRUE; 7154 break; 7155 case MATPRODUCT_PtAP: 7156 A = product->A; 7157 P = product->B; 7158 m = P->cmap->n; 7159 n = P->cmap->n; 7160 M = P->cmap->N; 7161 N = P->cmap->N; 7162 hasoffproc = PETSC_TRUE; 7163 break; 7164 default: 7165 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7166 } 7167 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7168 if (size == 1) hasoffproc = PETSC_FALSE; 7169 7170 /* defaults */ 7171 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7172 mp[i] = NULL; 7173 mptmp[i] = PETSC_FALSE; 7174 rmapt[i] = -1; 7175 cmapt[i] = -1; 7176 rmapa[i] = NULL; 7177 cmapa[i] = NULL; 7178 } 7179 7180 /* customization */ 7181 PetscCall(PetscNew(&mmdata)); 7182 mmdata->reusesym = product->api_user; 7183 if (ptype == MATPRODUCT_AB) { 7184 if (product->api_user) { 7185 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7186 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7187 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7188 PetscOptionsEnd(); 7189 } else { 7190 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7191 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7192 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7193 PetscOptionsEnd(); 7194 } 7195 } else if (ptype == MATPRODUCT_PtAP) { 7196 if (product->api_user) { 7197 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7198 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7199 PetscOptionsEnd(); 7200 } else { 7201 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7202 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7203 PetscOptionsEnd(); 7204 } 7205 } 7206 a = (Mat_MPIAIJ *)A->data; 7207 p = (Mat_MPIAIJ *)P->data; 7208 PetscCall(MatSetSizes(C, m, n, M, N)); 7209 PetscCall(PetscLayoutSetUp(C->rmap)); 7210 PetscCall(PetscLayoutSetUp(C->cmap)); 7211 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7212 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7213 7214 cp = 0; 7215 switch (ptype) { 7216 case MATPRODUCT_AB: /* A * P */ 7217 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7218 7219 /* A_diag * P_local (merged or not) */ 7220 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7221 /* P is product->B */ 7222 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7223 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7224 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7225 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7226 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7227 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7228 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7229 mp[cp]->product->api_user = product->api_user; 7230 PetscCall(MatProductSetFromOptions(mp[cp])); 7231 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7232 PetscCall(ISGetIndices(glob, &globidx)); 7233 rmapt[cp] = 1; 7234 cmapt[cp] = 2; 7235 cmapa[cp] = globidx; 7236 mptmp[cp] = PETSC_FALSE; 7237 cp++; 7238 } else { /* A_diag * P_diag and A_diag * P_off */ 7239 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7240 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7241 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7242 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7243 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7244 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7245 mp[cp]->product->api_user = product->api_user; 7246 PetscCall(MatProductSetFromOptions(mp[cp])); 7247 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7248 rmapt[cp] = 1; 7249 cmapt[cp] = 1; 7250 mptmp[cp] = PETSC_FALSE; 7251 cp++; 7252 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7253 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7254 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7255 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7256 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7257 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7258 mp[cp]->product->api_user = product->api_user; 7259 PetscCall(MatProductSetFromOptions(mp[cp])); 7260 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7261 rmapt[cp] = 1; 7262 cmapt[cp] = 2; 7263 cmapa[cp] = p->garray; 7264 mptmp[cp] = PETSC_FALSE; 7265 cp++; 7266 } 7267 7268 /* A_off * P_other */ 7269 if (mmdata->P_oth) { 7270 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7271 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7272 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7273 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7274 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7275 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7276 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7277 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7278 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7279 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7280 mp[cp]->product->api_user = product->api_user; 7281 PetscCall(MatProductSetFromOptions(mp[cp])); 7282 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7283 rmapt[cp] = 1; 7284 cmapt[cp] = 2; 7285 cmapa[cp] = P_oth_idx; 7286 mptmp[cp] = PETSC_FALSE; 7287 cp++; 7288 } 7289 break; 7290 7291 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7292 /* A is product->B */ 7293 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7294 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7295 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7296 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7297 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7298 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7299 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7300 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7301 mp[cp]->product->api_user = product->api_user; 7302 PetscCall(MatProductSetFromOptions(mp[cp])); 7303 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7304 PetscCall(ISGetIndices(glob, &globidx)); 7305 rmapt[cp] = 2; 7306 rmapa[cp] = globidx; 7307 cmapt[cp] = 2; 7308 cmapa[cp] = globidx; 7309 mptmp[cp] = PETSC_FALSE; 7310 cp++; 7311 } else { 7312 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7313 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7314 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7315 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7316 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7317 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7318 mp[cp]->product->api_user = product->api_user; 7319 PetscCall(MatProductSetFromOptions(mp[cp])); 7320 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7321 PetscCall(ISGetIndices(glob, &globidx)); 7322 rmapt[cp] = 1; 7323 cmapt[cp] = 2; 7324 cmapa[cp] = globidx; 7325 mptmp[cp] = PETSC_FALSE; 7326 cp++; 7327 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7328 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7329 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7330 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7331 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7332 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7333 mp[cp]->product->api_user = product->api_user; 7334 PetscCall(MatProductSetFromOptions(mp[cp])); 7335 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7336 rmapt[cp] = 2; 7337 rmapa[cp] = p->garray; 7338 cmapt[cp] = 2; 7339 cmapa[cp] = globidx; 7340 mptmp[cp] = PETSC_FALSE; 7341 cp++; 7342 } 7343 break; 7344 case MATPRODUCT_PtAP: 7345 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7346 /* P is product->B */ 7347 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7348 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7349 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7350 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7351 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7352 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7353 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7354 mp[cp]->product->api_user = product->api_user; 7355 PetscCall(MatProductSetFromOptions(mp[cp])); 7356 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7357 PetscCall(ISGetIndices(glob, &globidx)); 7358 rmapt[cp] = 2; 7359 rmapa[cp] = globidx; 7360 cmapt[cp] = 2; 7361 cmapa[cp] = globidx; 7362 mptmp[cp] = PETSC_FALSE; 7363 cp++; 7364 if (mmdata->P_oth) { 7365 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7366 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7367 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7368 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7369 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7370 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7371 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7372 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7373 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7374 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7375 mp[cp]->product->api_user = product->api_user; 7376 PetscCall(MatProductSetFromOptions(mp[cp])); 7377 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7378 mptmp[cp] = PETSC_TRUE; 7379 cp++; 7380 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7381 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7382 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7383 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7384 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7385 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7386 mp[cp]->product->api_user = product->api_user; 7387 PetscCall(MatProductSetFromOptions(mp[cp])); 7388 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7389 rmapt[cp] = 2; 7390 rmapa[cp] = globidx; 7391 cmapt[cp] = 2; 7392 cmapa[cp] = P_oth_idx; 7393 mptmp[cp] = PETSC_FALSE; 7394 cp++; 7395 } 7396 break; 7397 default: 7398 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7399 } 7400 /* sanity check */ 7401 if (size > 1) 7402 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7403 7404 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7405 for (i = 0; i < cp; i++) { 7406 mmdata->mp[i] = mp[i]; 7407 mmdata->mptmp[i] = mptmp[i]; 7408 } 7409 mmdata->cp = cp; 7410 C->product->data = mmdata; 7411 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7412 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7413 7414 /* memory type */ 7415 mmdata->mtype = PETSC_MEMTYPE_HOST; 7416 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7417 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7418 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7419 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7420 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7421 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7422 7423 /* prepare coo coordinates for values insertion */ 7424 7425 /* count total nonzeros of those intermediate seqaij Mats 7426 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7427 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7428 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7429 */ 7430 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7431 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7432 if (mptmp[cp]) continue; 7433 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7434 const PetscInt *rmap = rmapa[cp]; 7435 const PetscInt mr = mp[cp]->rmap->n; 7436 const PetscInt rs = C->rmap->rstart; 7437 const PetscInt re = C->rmap->rend; 7438 const PetscInt *ii = mm->i; 7439 for (i = 0; i < mr; i++) { 7440 const PetscInt gr = rmap[i]; 7441 const PetscInt nz = ii[i + 1] - ii[i]; 7442 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7443 else ncoo_oown += nz; /* this row is local */ 7444 } 7445 } else ncoo_d += mm->nz; 7446 } 7447 7448 /* 7449 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7450 7451 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7452 7453 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7454 7455 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7456 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7457 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7458 7459 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7460 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7461 */ 7462 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7463 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7464 7465 /* gather (i,j) of nonzeros inserted by remote procs */ 7466 if (hasoffproc) { 7467 PetscSF msf; 7468 PetscInt ncoo2, *coo_i2, *coo_j2; 7469 7470 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7471 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7472 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7473 7474 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7475 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7476 PetscInt *idxoff = mmdata->off[cp]; 7477 PetscInt *idxown = mmdata->own[cp]; 7478 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7479 const PetscInt *rmap = rmapa[cp]; 7480 const PetscInt *cmap = cmapa[cp]; 7481 const PetscInt *ii = mm->i; 7482 PetscInt *coi = coo_i + ncoo_o; 7483 PetscInt *coj = coo_j + ncoo_o; 7484 const PetscInt mr = mp[cp]->rmap->n; 7485 const PetscInt rs = C->rmap->rstart; 7486 const PetscInt re = C->rmap->rend; 7487 const PetscInt cs = C->cmap->rstart; 7488 for (i = 0; i < mr; i++) { 7489 const PetscInt *jj = mm->j + ii[i]; 7490 const PetscInt gr = rmap[i]; 7491 const PetscInt nz = ii[i + 1] - ii[i]; 7492 if (gr < rs || gr >= re) { /* this is an offproc row */ 7493 for (j = ii[i]; j < ii[i + 1]; j++) { 7494 *coi++ = gr; 7495 *idxoff++ = j; 7496 } 7497 if (!cmapt[cp]) { /* already global */ 7498 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7499 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7500 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7501 } else { /* offdiag */ 7502 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7503 } 7504 ncoo_o += nz; 7505 } else { /* this is a local row */ 7506 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7507 } 7508 } 7509 } 7510 mmdata->off[cp + 1] = idxoff; 7511 mmdata->own[cp + 1] = idxown; 7512 } 7513 7514 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7515 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7516 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7517 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7518 ncoo = ncoo_d + ncoo_oown + ncoo2; 7519 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7520 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7521 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7522 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7523 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7524 PetscCall(PetscFree2(coo_i, coo_j)); 7525 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7526 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7527 coo_i = coo_i2; 7528 coo_j = coo_j2; 7529 } else { /* no offproc values insertion */ 7530 ncoo = ncoo_d; 7531 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7532 7533 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7534 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7535 PetscCall(PetscSFSetUp(mmdata->sf)); 7536 } 7537 mmdata->hasoffproc = hasoffproc; 7538 7539 /* gather (i,j) of nonzeros inserted locally */ 7540 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7541 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7542 PetscInt *coi = coo_i + ncoo_d; 7543 PetscInt *coj = coo_j + ncoo_d; 7544 const PetscInt *jj = mm->j; 7545 const PetscInt *ii = mm->i; 7546 const PetscInt *cmap = cmapa[cp]; 7547 const PetscInt *rmap = rmapa[cp]; 7548 const PetscInt mr = mp[cp]->rmap->n; 7549 const PetscInt rs = C->rmap->rstart; 7550 const PetscInt re = C->rmap->rend; 7551 const PetscInt cs = C->cmap->rstart; 7552 7553 if (mptmp[cp]) continue; 7554 if (rmapt[cp] == 1) { /* consecutive rows */ 7555 /* fill coo_i */ 7556 for (i = 0; i < mr; i++) { 7557 const PetscInt gr = i + rs; 7558 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7559 } 7560 /* fill coo_j */ 7561 if (!cmapt[cp]) { /* type-0, already global */ 7562 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7563 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7564 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7565 } else { /* type-2, local to global for sparse columns */ 7566 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7567 } 7568 ncoo_d += mm->nz; 7569 } else if (rmapt[cp] == 2) { /* sparse rows */ 7570 for (i = 0; i < mr; i++) { 7571 const PetscInt *jj = mm->j + ii[i]; 7572 const PetscInt gr = rmap[i]; 7573 const PetscInt nz = ii[i + 1] - ii[i]; 7574 if (gr >= rs && gr < re) { /* local rows */ 7575 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7576 if (!cmapt[cp]) { /* type-0, already global */ 7577 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7578 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7579 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7580 } else { /* type-2, local to global for sparse columns */ 7581 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7582 } 7583 ncoo_d += nz; 7584 } 7585 } 7586 } 7587 } 7588 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7589 PetscCall(ISDestroy(&glob)); 7590 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7591 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7592 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7593 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7594 7595 /* preallocate with COO data */ 7596 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7597 PetscCall(PetscFree2(coo_i, coo_j)); 7598 PetscFunctionReturn(PETSC_SUCCESS); 7599 } 7600 7601 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7602 { 7603 Mat_Product *product = mat->product; 7604 #if defined(PETSC_HAVE_DEVICE) 7605 PetscBool match = PETSC_FALSE; 7606 PetscBool usecpu = PETSC_FALSE; 7607 #else 7608 PetscBool match = PETSC_TRUE; 7609 #endif 7610 7611 PetscFunctionBegin; 7612 MatCheckProduct(mat, 1); 7613 #if defined(PETSC_HAVE_DEVICE) 7614 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7615 if (match) { /* we can always fallback to the CPU if requested */ 7616 switch (product->type) { 7617 case MATPRODUCT_AB: 7618 if (product->api_user) { 7619 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7620 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7621 PetscOptionsEnd(); 7622 } else { 7623 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7624 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7625 PetscOptionsEnd(); 7626 } 7627 break; 7628 case MATPRODUCT_AtB: 7629 if (product->api_user) { 7630 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7631 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7632 PetscOptionsEnd(); 7633 } else { 7634 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7635 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7636 PetscOptionsEnd(); 7637 } 7638 break; 7639 case MATPRODUCT_PtAP: 7640 if (product->api_user) { 7641 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7642 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7643 PetscOptionsEnd(); 7644 } else { 7645 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7646 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7647 PetscOptionsEnd(); 7648 } 7649 break; 7650 default: 7651 break; 7652 } 7653 match = (PetscBool)!usecpu; 7654 } 7655 #endif 7656 if (match) { 7657 switch (product->type) { 7658 case MATPRODUCT_AB: 7659 case MATPRODUCT_AtB: 7660 case MATPRODUCT_PtAP: 7661 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7662 break; 7663 default: 7664 break; 7665 } 7666 } 7667 /* fallback to MPIAIJ ops */ 7668 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7669 PetscFunctionReturn(PETSC_SUCCESS); 7670 } 7671 7672 /* 7673 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7674 7675 n - the number of block indices in cc[] 7676 cc - the block indices (must be large enough to contain the indices) 7677 */ 7678 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7679 { 7680 PetscInt cnt = -1, nidx, j; 7681 const PetscInt *idx; 7682 7683 PetscFunctionBegin; 7684 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7685 if (nidx) { 7686 cnt = 0; 7687 cc[cnt] = idx[0] / bs; 7688 for (j = 1; j < nidx; j++) { 7689 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7690 } 7691 } 7692 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7693 *n = cnt + 1; 7694 PetscFunctionReturn(PETSC_SUCCESS); 7695 } 7696 7697 /* 7698 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7699 7700 ncollapsed - the number of block indices 7701 collapsed - the block indices (must be large enough to contain the indices) 7702 */ 7703 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7704 { 7705 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7706 7707 PetscFunctionBegin; 7708 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7709 for (i = start + 1; i < start + bs; i++) { 7710 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7711 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7712 cprevtmp = cprev; 7713 cprev = merged; 7714 merged = cprevtmp; 7715 } 7716 *ncollapsed = nprev; 7717 if (collapsed) *collapsed = cprev; 7718 PetscFunctionReturn(PETSC_SUCCESS); 7719 } 7720 7721 /* 7722 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7723 7724 Input Parameter: 7725 . Amat - matrix 7726 - symmetrize - make the result symmetric 7727 + scale - scale with diagonal 7728 7729 Output Parameter: 7730 . a_Gmat - output scalar graph >= 0 7731 7732 */ 7733 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7734 { 7735 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7736 MPI_Comm comm; 7737 Mat Gmat; 7738 PetscBool ismpiaij, isseqaij; 7739 Mat a, b, c; 7740 MatType jtype; 7741 7742 PetscFunctionBegin; 7743 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7744 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7745 PetscCall(MatGetSize(Amat, &MM, &NN)); 7746 PetscCall(MatGetBlockSize(Amat, &bs)); 7747 nloc = (Iend - Istart) / bs; 7748 7749 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7750 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7751 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7752 7753 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7754 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7755 implementation */ 7756 if (bs > 1) { 7757 PetscCall(MatGetType(Amat, &jtype)); 7758 PetscCall(MatCreate(comm, &Gmat)); 7759 PetscCall(MatSetType(Gmat, jtype)); 7760 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7761 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7762 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7763 PetscInt *d_nnz, *o_nnz; 7764 MatScalar *aa, val, *AA; 7765 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7766 if (isseqaij) { 7767 a = Amat; 7768 b = NULL; 7769 } else { 7770 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7771 a = d->A; 7772 b = d->B; 7773 } 7774 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7775 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7776 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7777 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7778 const PetscInt *cols1, *cols2; 7779 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7780 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7781 nnz[brow / bs] = nc2 / bs; 7782 if (nc2 % bs) ok = 0; 7783 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7784 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7785 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7786 if (nc1 != nc2) ok = 0; 7787 else { 7788 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7789 if (cols1[jj] != cols2[jj]) ok = 0; 7790 if (cols1[jj] % bs != jj % bs) ok = 0; 7791 } 7792 } 7793 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7794 } 7795 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7796 if (!ok) { 7797 PetscCall(PetscFree2(d_nnz, o_nnz)); 7798 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7799 goto old_bs; 7800 } 7801 } 7802 } 7803 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7804 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7805 PetscCall(PetscFree2(d_nnz, o_nnz)); 7806 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7807 // diag 7808 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7809 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7810 ai = aseq->i; 7811 n = ai[brow + 1] - ai[brow]; 7812 aj = aseq->j + ai[brow]; 7813 for (int k = 0; k < n; k += bs) { // block columns 7814 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7815 val = 0; 7816 if (index_size == 0) { 7817 for (int ii = 0; ii < bs; ii++) { // rows in block 7818 aa = aseq->a + ai[brow + ii] + k; 7819 for (int jj = 0; jj < bs; jj++) { // columns in block 7820 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7821 } 7822 } 7823 } else { // use (index,index) value if provided 7824 for (int iii = 0; iii < index_size; iii++) { // rows in block 7825 int ii = index[iii]; 7826 aa = aseq->a + ai[brow + ii] + k; 7827 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7828 int jj = index[jjj]; 7829 val = PetscAbs(PetscRealPart(aa[jj])); 7830 } 7831 } 7832 } 7833 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7834 AA[k / bs] = val; 7835 } 7836 grow = Istart / bs + brow / bs; 7837 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7838 } 7839 // off-diag 7840 if (ismpiaij) { 7841 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7842 const PetscScalar *vals; 7843 const PetscInt *cols, *garray = aij->garray; 7844 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7845 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7846 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7847 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7848 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7849 AA[k / bs] = 0; 7850 AJ[cidx] = garray[cols[k]] / bs; 7851 } 7852 nc = ncols / bs; 7853 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7854 if (index_size == 0) { 7855 for (int ii = 0; ii < bs; ii++) { // rows in block 7856 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7857 for (int k = 0; k < ncols; k += bs) { 7858 for (int jj = 0; jj < bs; jj++) { // cols in block 7859 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7860 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7861 } 7862 } 7863 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7864 } 7865 } else { // use (index,index) value if provided 7866 for (int iii = 0; iii < index_size; iii++) { // rows in block 7867 int ii = index[iii]; 7868 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7869 for (int k = 0; k < ncols; k += bs) { 7870 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7871 int jj = index[jjj]; 7872 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7873 } 7874 } 7875 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7876 } 7877 } 7878 grow = Istart / bs + brow / bs; 7879 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7880 } 7881 } 7882 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7883 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7884 PetscCall(PetscFree2(AA, AJ)); 7885 } else { 7886 const PetscScalar *vals; 7887 const PetscInt *idx; 7888 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7889 old_bs: 7890 /* 7891 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7892 */ 7893 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7894 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7895 if (isseqaij) { 7896 PetscInt max_d_nnz; 7897 /* 7898 Determine exact preallocation count for (sequential) scalar matrix 7899 */ 7900 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7901 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7902 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7903 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7904 PetscCall(PetscFree3(w0, w1, w2)); 7905 } else if (ismpiaij) { 7906 Mat Daij, Oaij; 7907 const PetscInt *garray; 7908 PetscInt max_d_nnz; 7909 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7910 /* 7911 Determine exact preallocation count for diagonal block portion of scalar matrix 7912 */ 7913 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7914 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7915 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7916 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7917 PetscCall(PetscFree3(w0, w1, w2)); 7918 /* 7919 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7920 */ 7921 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7922 o_nnz[jj] = 0; 7923 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7924 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7925 o_nnz[jj] += ncols; 7926 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7927 } 7928 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7929 } 7930 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7931 /* get scalar copy (norms) of matrix */ 7932 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7933 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7934 PetscCall(PetscFree2(d_nnz, o_nnz)); 7935 for (Ii = Istart; Ii < Iend; Ii++) { 7936 PetscInt dest_row = Ii / bs; 7937 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7938 for (jj = 0; jj < ncols; jj++) { 7939 PetscInt dest_col = idx[jj] / bs; 7940 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7941 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7942 } 7943 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7944 } 7945 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7946 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7947 } 7948 } else { 7949 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7950 else { 7951 Gmat = Amat; 7952 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7953 } 7954 if (isseqaij) { 7955 a = Gmat; 7956 b = NULL; 7957 } else { 7958 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7959 a = d->A; 7960 b = d->B; 7961 } 7962 if (filter >= 0 || scale) { 7963 /* take absolute value of each entry */ 7964 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7965 MatInfo info; 7966 PetscScalar *avals; 7967 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7968 PetscCall(MatSeqAIJGetArray(c, &avals)); 7969 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7970 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7971 } 7972 } 7973 } 7974 if (symmetrize) { 7975 PetscBool isset, issym; 7976 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7977 if (!isset || !issym) { 7978 Mat matTrans; 7979 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7980 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7981 PetscCall(MatDestroy(&matTrans)); 7982 } 7983 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7984 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7985 if (scale) { 7986 /* scale c for all diagonal values = 1 or -1 */ 7987 Vec diag; 7988 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7989 PetscCall(MatGetDiagonal(Gmat, diag)); 7990 PetscCall(VecReciprocal(diag)); 7991 PetscCall(VecSqrtAbs(diag)); 7992 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7993 PetscCall(VecDestroy(&diag)); 7994 } 7995 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7996 7997 if (filter >= 0) { 7998 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 7999 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8000 } 8001 *a_Gmat = Gmat; 8002 PetscFunctionReturn(PETSC_SUCCESS); 8003 } 8004 8005 /* 8006 Special version for direct calls from Fortran 8007 */ 8008 #include <petsc/private/fortranimpl.h> 8009 8010 /* Change these macros so can be used in void function */ 8011 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8012 #undef PetscCall 8013 #define PetscCall(...) \ 8014 do { \ 8015 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8016 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8017 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8018 return; \ 8019 } \ 8020 } while (0) 8021 8022 #undef SETERRQ 8023 #define SETERRQ(comm, ierr, ...) \ 8024 do { \ 8025 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8026 return; \ 8027 } while (0) 8028 8029 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8030 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8031 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8032 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8033 #else 8034 #endif 8035 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8036 { 8037 Mat mat = *mmat; 8038 PetscInt m = *mm, n = *mn; 8039 InsertMode addv = *maddv; 8040 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8041 PetscScalar value; 8042 8043 MatCheckPreallocated(mat, 1); 8044 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8045 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8046 { 8047 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8048 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8049 PetscBool roworiented = aij->roworiented; 8050 8051 /* Some Variables required in the macro */ 8052 Mat A = aij->A; 8053 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8054 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8055 MatScalar *aa; 8056 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8057 Mat B = aij->B; 8058 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8059 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8060 MatScalar *ba; 8061 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8062 * cannot use "#if defined" inside a macro. */ 8063 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8064 8065 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8066 PetscInt nonew = a->nonew; 8067 MatScalar *ap1, *ap2; 8068 8069 PetscFunctionBegin; 8070 PetscCall(MatSeqAIJGetArray(A, &aa)); 8071 PetscCall(MatSeqAIJGetArray(B, &ba)); 8072 for (i = 0; i < m; i++) { 8073 if (im[i] < 0) continue; 8074 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8075 if (im[i] >= rstart && im[i] < rend) { 8076 row = im[i] - rstart; 8077 lastcol1 = -1; 8078 rp1 = aj + ai[row]; 8079 ap1 = aa + ai[row]; 8080 rmax1 = aimax[row]; 8081 nrow1 = ailen[row]; 8082 low1 = 0; 8083 high1 = nrow1; 8084 lastcol2 = -1; 8085 rp2 = bj + bi[row]; 8086 ap2 = ba + bi[row]; 8087 rmax2 = bimax[row]; 8088 nrow2 = bilen[row]; 8089 low2 = 0; 8090 high2 = nrow2; 8091 8092 for (j = 0; j < n; j++) { 8093 if (roworiented) value = v[i * n + j]; 8094 else value = v[i + j * m]; 8095 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8096 if (in[j] >= cstart && in[j] < cend) { 8097 col = in[j] - cstart; 8098 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8099 } else if (in[j] < 0) continue; 8100 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8101 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8102 } else { 8103 if (mat->was_assembled) { 8104 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8105 #if defined(PETSC_USE_CTABLE) 8106 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8107 col--; 8108 #else 8109 col = aij->colmap[in[j]] - 1; 8110 #endif 8111 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8112 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8113 col = in[j]; 8114 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8115 B = aij->B; 8116 b = (Mat_SeqAIJ *)B->data; 8117 bimax = b->imax; 8118 bi = b->i; 8119 bilen = b->ilen; 8120 bj = b->j; 8121 rp2 = bj + bi[row]; 8122 ap2 = ba + bi[row]; 8123 rmax2 = bimax[row]; 8124 nrow2 = bilen[row]; 8125 low2 = 0; 8126 high2 = nrow2; 8127 bm = aij->B->rmap->n; 8128 ba = b->a; 8129 inserted = PETSC_FALSE; 8130 } 8131 } else col = in[j]; 8132 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8133 } 8134 } 8135 } else if (!aij->donotstash) { 8136 if (roworiented) { 8137 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8138 } else { 8139 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8140 } 8141 } 8142 } 8143 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8144 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8145 } 8146 PetscFunctionReturnVoid(); 8147 } 8148 8149 /* Undefining these here since they were redefined from their original definition above! No 8150 * other PETSc functions should be defined past this point, as it is impossible to recover the 8151 * original definitions */ 8152 #undef PetscCall 8153 #undef SETERRQ 8154