1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 167 PetscFunctionReturn(PETSC_SUCCESS); 168 } 169 170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 171 { 172 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 173 174 PetscFunctionBegin; 175 if (mat->A) { 176 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 177 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 178 } 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181 182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 183 { 184 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 186 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 187 const PetscInt *ia, *ib; 188 const MatScalar *aa, *bb, *aav, *bav; 189 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 190 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 191 192 PetscFunctionBegin; 193 *keptrows = NULL; 194 195 ia = a->i; 196 ib = b->i; 197 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 198 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 199 for (i = 0; i < m; i++) { 200 na = ia[i + 1] - ia[i]; 201 nb = ib[i + 1] - ib[i]; 202 if (!na && !nb) { 203 cnt++; 204 goto ok1; 205 } 206 aa = aav + ia[i]; 207 for (j = 0; j < na; j++) { 208 if (aa[j] != 0.0) goto ok1; 209 } 210 bb = PetscSafePointerPlusOffset(bav, ib[i]); 211 for (j = 0; j < nb; j++) { 212 if (bb[j] != 0.0) goto ok1; 213 } 214 cnt++; 215 ok1:; 216 } 217 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 218 if (!n0rows) { 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 220 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 221 PetscFunctionReturn(PETSC_SUCCESS); 222 } 223 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 224 cnt = 0; 225 for (i = 0; i < m; i++) { 226 na = ia[i + 1] - ia[i]; 227 nb = ib[i + 1] - ib[i]; 228 if (!na && !nb) continue; 229 aa = aav + ia[i]; 230 for (j = 0; j < na; j++) { 231 if (aa[j] != 0.0) { 232 rows[cnt++] = rstart + i; 233 goto ok2; 234 } 235 } 236 bb = PetscSafePointerPlusOffset(bav, ib[i]); 237 for (j = 0; j < nb; j++) { 238 if (bb[j] != 0.0) { 239 rows[cnt++] = rstart + i; 240 goto ok2; 241 } 242 } 243 ok2:; 244 } 245 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 247 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 252 { 253 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 254 PetscBool cong; 255 256 PetscFunctionBegin; 257 PetscCall(MatHasCongruentLayouts(Y, &cong)); 258 if (Y->assembled && cong) { 259 PetscCall(MatDiagonalSet(aij->A, D, is)); 260 } else { 261 PetscCall(MatDiagonalSet_Default(Y, D, is)); 262 } 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 267 { 268 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 269 PetscInt i, rstart, nrows, *rows; 270 271 PetscFunctionBegin; 272 *zrows = NULL; 273 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 274 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 275 for (i = 0; i < nrows; i++) rows[i] += rstart; 276 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 283 PetscInt i, m, n, *garray = aij->garray; 284 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 285 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 286 PetscReal *work; 287 const PetscScalar *dummy; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 if (type == NORM_INFINITY) { 313 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 314 } else { 315 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 316 } 317 PetscCall(PetscFree(work)); 318 if (type == NORM_2) { 319 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 320 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 321 for (i = 0; i < n; i++) reductions[i] /= m; 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 327 { 328 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 329 IS sis, gis; 330 const PetscInt *isis, *igis; 331 PetscInt n, *iis, nsis, ngis, rstart, i; 332 333 PetscFunctionBegin; 334 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 335 PetscCall(MatFindNonzeroRows(a->B, &gis)); 336 PetscCall(ISGetSize(gis, &ngis)); 337 PetscCall(ISGetSize(sis, &nsis)); 338 PetscCall(ISGetIndices(sis, &isis)); 339 PetscCall(ISGetIndices(gis, &igis)); 340 341 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 342 PetscCall(PetscArraycpy(iis, igis, ngis)); 343 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 344 n = ngis + nsis; 345 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 346 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 347 for (i = 0; i < n; i++) iis[i] += rstart; 348 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 349 350 PetscCall(ISRestoreIndices(sis, &isis)); 351 PetscCall(ISRestoreIndices(gis, &igis)); 352 PetscCall(ISDestroy(&sis)); 353 PetscCall(ISDestroy(&gis)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 /* 358 Local utility routine that creates a mapping from the global column 359 number to the local number in the off-diagonal part of the local 360 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 361 a slightly higher hash table cost; without it it is not scalable (each processor 362 has an order N integer array but is fast to access. 363 */ 364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 365 { 366 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 367 PetscInt n = aij->B->cmap->n, i; 368 369 PetscFunctionBegin; 370 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 371 #if defined(PETSC_USE_CTABLE) 372 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 373 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 374 #else 375 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 376 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 377 #endif 378 PetscFunctionReturn(PETSC_SUCCESS); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 382 do { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col; \ 386 while (high1 - low1 > 5) { \ 387 t = (low1 + high1) / 2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i = low1; _i < high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) { \ 395 ap1[_i] += value; \ 396 /* Not sure LogFlops will slow dow the code or not */ \ 397 (void)PetscLogFlops(1.0); \ 398 } else ap1[_i] = value; \ 399 goto a_noinsert; \ 400 } \ 401 } \ 402 if (value == 0.0 && ignorezeroentries && row != col) { \ 403 low1 = 0; \ 404 high1 = nrow1; \ 405 goto a_noinsert; \ 406 } \ 407 if (nonew == 1) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 413 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 414 N = nrow1++ - 1; \ 415 a->nz++; \ 416 high1++; \ 417 /* shift up all the later entries in this row */ \ 418 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 419 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 B->nonzerostate++; \ 468 b_noinsert:; \ 469 bilen[row] = nrow2; \ 470 } while (0) 471 472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 473 { 474 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 475 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 476 PetscInt l, *garray = mat->garray, diag; 477 PetscScalar *aa, *ba; 478 479 PetscFunctionBegin; 480 /* code only works for square matrices A */ 481 482 /* find size of row to the left of the diagonal part */ 483 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 484 row = row - diag; 485 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 486 if (garray[b->j[b->i[row] + l]] > diag) break; 487 } 488 if (l) { 489 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 490 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 491 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 492 } 493 494 /* diagonal part */ 495 if (a->i[row + 1] - a->i[row]) { 496 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 497 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 498 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 499 } 500 501 /* right of diagonal part */ 502 if (b->i[row + 1] - b->i[row] - l) { 503 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 504 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 505 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 506 } 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 511 { 512 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 513 PetscScalar value = 0.0; 514 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 515 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 516 PetscBool roworiented = aij->roworiented; 517 518 /* Some Variables required in the macro */ 519 Mat A = aij->A; 520 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 521 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 522 PetscBool ignorezeroentries = a->ignorezeroentries; 523 Mat B = aij->B; 524 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 525 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 526 MatScalar *aa, *ba; 527 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 528 PetscInt nonew; 529 MatScalar *ap1, *ap2; 530 531 PetscFunctionBegin; 532 PetscCall(MatSeqAIJGetArray(A, &aa)); 533 PetscCall(MatSeqAIJGetArray(B, &ba)); 534 for (i = 0; i < m; i++) { 535 if (im[i] < 0) continue; 536 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 537 if (im[i] >= rstart && im[i] < rend) { 538 row = im[i] - rstart; 539 lastcol1 = -1; 540 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 541 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 542 rmax1 = aimax[row]; 543 nrow1 = ailen[row]; 544 low1 = 0; 545 high1 = nrow1; 546 lastcol2 = -1; 547 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 548 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 549 rmax2 = bimax[row]; 550 nrow2 = bilen[row]; 551 low2 = 0; 552 high2 = nrow2; 553 554 for (j = 0; j < n; j++) { 555 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 556 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 557 if (in[j] >= cstart && in[j] < cend) { 558 col = in[j] - cstart; 559 nonew = a->nonew; 560 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 561 } else if (in[j] < 0) { 562 continue; 563 } else { 564 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 565 if (mat->was_assembled) { 566 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 567 #if defined(PETSC_USE_CTABLE) 568 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 569 col--; 570 #else 571 col = aij->colmap[in[j]] - 1; 572 #endif 573 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 574 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 575 col = in[j]; 576 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 577 B = aij->B; 578 b = (Mat_SeqAIJ *)B->data; 579 bimax = b->imax; 580 bi = b->i; 581 bilen = b->ilen; 582 bj = b->j; 583 ba = b->a; 584 rp2 = bj + bi[row]; 585 ap2 = ba + bi[row]; 586 rmax2 = bimax[row]; 587 nrow2 = bilen[row]; 588 low2 = 0; 589 high2 = nrow2; 590 bm = aij->B->rmap->n; 591 ba = b->a; 592 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 593 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 594 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 595 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 596 } 597 } else col = in[j]; 598 nonew = b->nonew; 599 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 600 } 601 } 602 } else { 603 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 604 if (!aij->donotstash) { 605 mat->assembled = PETSC_FALSE; 606 if (roworiented) { 607 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 608 } else { 609 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 610 } 611 } 612 } 613 } 614 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 615 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 /* 620 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 621 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 622 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 623 */ 624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 627 Mat A = aij->A; /* diagonal part of the matrix */ 628 Mat B = aij->B; /* off-diagonal part of the matrix */ 629 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 630 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 632 PetscInt *ailen = a->ilen, *aj = a->j; 633 PetscInt *bilen = b->ilen, *bj = b->j; 634 PetscInt am = aij->A->rmap->n, j; 635 PetscInt diag_so_far = 0, dnz; 636 PetscInt offd_so_far = 0, onz; 637 638 PetscFunctionBegin; 639 /* Iterate over all rows of the matrix */ 640 for (j = 0; j < am; j++) { 641 dnz = onz = 0; 642 /* Iterate over all non-zero columns of the current row */ 643 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 644 /* If column is in the diagonal */ 645 if (mat_j[col] >= cstart && mat_j[col] < cend) { 646 aj[diag_so_far++] = mat_j[col] - cstart; 647 dnz++; 648 } else { /* off-diagonal entries */ 649 bj[offd_so_far++] = mat_j[col]; 650 onz++; 651 } 652 } 653 ailen[j] = dnz; 654 bilen[j] = onz; 655 } 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 /* 660 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 661 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 662 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 663 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 664 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 665 */ 666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 669 Mat A = aij->A; /* diagonal part of the matrix */ 670 Mat B = aij->B; /* off-diagonal part of the matrix */ 671 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 673 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 674 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 675 PetscInt *ailen = a->ilen, *aj = a->j; 676 PetscInt *bilen = b->ilen, *bj = b->j; 677 PetscInt am = aij->A->rmap->n, j; 678 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 679 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 680 PetscScalar *aa = a->a, *ba = b->a; 681 682 PetscFunctionBegin; 683 /* Iterate over all rows of the matrix */ 684 for (j = 0; j < am; j++) { 685 dnz_row = onz_row = 0; 686 rowstart_offd = full_offd_i[j]; 687 rowstart_diag = full_diag_i[j]; 688 /* Iterate over all non-zero columns of the current row */ 689 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 690 /* If column is in the diagonal */ 691 if (mat_j[col] >= cstart && mat_j[col] < cend) { 692 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 693 aa[rowstart_diag + dnz_row] = mat_a[col]; 694 dnz_row++; 695 } else { /* off-diagonal entries */ 696 bj[rowstart_offd + onz_row] = mat_j[col]; 697 ba[rowstart_offd + onz_row] = mat_a[col]; 698 onz_row++; 699 } 700 } 701 ailen[j] = dnz_row; 702 bilen[j] = onz_row; 703 } 704 PetscFunctionReturn(PETSC_SUCCESS); 705 } 706 707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 708 { 709 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 710 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 711 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 712 713 PetscFunctionBegin; 714 for (i = 0; i < m; i++) { 715 if (idxm[i] < 0) continue; /* negative row */ 716 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 717 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 718 row = idxm[i] - rstart; 719 for (j = 0; j < n; j++) { 720 if (idxn[j] < 0) continue; /* negative column */ 721 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 722 if (idxn[j] >= cstart && idxn[j] < cend) { 723 col = idxn[j] - cstart; 724 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 725 } else { 726 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 727 #if defined(PETSC_USE_CTABLE) 728 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 729 col--; 730 #else 731 col = aij->colmap[idxn[j]] - 1; 732 #endif 733 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 734 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 735 } 736 } 737 } 738 PetscFunctionReturn(PETSC_SUCCESS); 739 } 740 741 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 744 PetscInt nstash, reallocs; 745 746 PetscFunctionBegin; 747 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 748 749 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 750 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 751 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 758 PetscMPIInt n; 759 PetscInt i, j, rstart, ncols, flg; 760 PetscInt *row, *col; 761 PetscBool other_disassembled; 762 PetscScalar *val; 763 764 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 765 766 PetscFunctionBegin; 767 if (!aij->donotstash && !mat->nooffprocentries) { 768 while (1) { 769 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 770 if (!flg) break; 771 772 for (i = 0; i < n;) { 773 /* Now identify the consecutive vals belonging to the same row */ 774 for (j = i, rstart = row[j]; j < n; j++) { 775 if (row[j] != rstart) break; 776 } 777 if (j < n) ncols = j - i; 778 else ncols = n - i; 779 /* Now assemble all these values with a single function call */ 780 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 781 i = j; 782 } 783 } 784 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 785 } 786 #if defined(PETSC_HAVE_DEVICE) 787 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 788 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 789 if (mat->boundtocpu) { 790 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 791 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 792 } 793 #endif 794 PetscCall(MatAssemblyBegin(aij->A, mode)); 795 PetscCall(MatAssemblyEnd(aij->A, mode)); 796 797 /* determine if any processor has disassembled, if so we must 798 also disassemble ourself, in order that we may reassemble. */ 799 /* 800 if nonzero structure of submatrix B cannot change then we know that 801 no processor disassembled thus we can skip this stuff 802 */ 803 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 804 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 805 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 806 PetscCall(MatDisAssemble_MPIAIJ(mat)); 807 } 808 } 809 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 810 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 811 #if defined(PETSC_HAVE_DEVICE) 812 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 813 #endif 814 PetscCall(MatAssemblyBegin(aij->B, mode)); 815 PetscCall(MatAssemblyEnd(aij->B, mode)); 816 817 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 818 819 aij->rowvalues = NULL; 820 821 PetscCall(VecDestroy(&aij->diag)); 822 823 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 824 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 825 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 826 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 827 } 828 #if defined(PETSC_HAVE_DEVICE) 829 mat->offloadmask = PETSC_OFFLOAD_BOTH; 830 #endif 831 PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 835 { 836 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 837 838 PetscFunctionBegin; 839 PetscCall(MatZeroEntries(l->A)); 840 PetscCall(MatZeroEntries(l->B)); 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 845 { 846 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 847 PetscInt *lrows; 848 PetscInt r, len; 849 PetscBool cong; 850 851 PetscFunctionBegin; 852 /* get locally owned rows */ 853 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 854 PetscCall(MatHasCongruentLayouts(A, &cong)); 855 /* fix right hand side if needed */ 856 if (x && b) { 857 const PetscScalar *xx; 858 PetscScalar *bb; 859 860 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 861 PetscCall(VecGetArrayRead(x, &xx)); 862 PetscCall(VecGetArray(b, &bb)); 863 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 864 PetscCall(VecRestoreArrayRead(x, &xx)); 865 PetscCall(VecRestoreArray(b, &bb)); 866 } 867 868 if (diag != 0.0 && cong) { 869 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 870 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 871 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 872 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 873 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 874 PetscInt nnwA, nnwB; 875 PetscBool nnzA, nnzB; 876 877 nnwA = aijA->nonew; 878 nnwB = aijB->nonew; 879 nnzA = aijA->keepnonzeropattern; 880 nnzB = aijB->keepnonzeropattern; 881 if (!nnzA) { 882 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 883 aijA->nonew = 0; 884 } 885 if (!nnzB) { 886 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 887 aijB->nonew = 0; 888 } 889 /* Must zero here before the next loop */ 890 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 891 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 892 for (r = 0; r < len; ++r) { 893 const PetscInt row = lrows[r] + A->rmap->rstart; 894 if (row >= A->cmap->N) continue; 895 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 896 } 897 aijA->nonew = nnwA; 898 aijB->nonew = nnwB; 899 } else { 900 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 901 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 902 } 903 PetscCall(PetscFree(lrows)); 904 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 905 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 906 907 /* only change matrix nonzero state if pattern was allowed to be changed */ 908 if (!((Mat_SeqAIJ *)(mat->A->data))->keepnonzeropattern || !((Mat_SeqAIJ *)(mat->A->data))->nonew) { 909 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 910 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 911 } 912 PetscFunctionReturn(PETSC_SUCCESS); 913 } 914 915 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 916 { 917 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 918 PetscMPIInt n = A->rmap->n; 919 PetscInt i, j, r, m, len = 0; 920 PetscInt *lrows, *owners = A->rmap->range; 921 PetscMPIInt p = 0; 922 PetscSFNode *rrows; 923 PetscSF sf; 924 const PetscScalar *xx; 925 PetscScalar *bb, *mask, *aij_a; 926 Vec xmask, lmask; 927 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 928 const PetscInt *aj, *ii, *ridx; 929 PetscScalar *aa; 930 931 PetscFunctionBegin; 932 /* Create SF where leaves are input rows and roots are owned rows */ 933 PetscCall(PetscMalloc1(n, &lrows)); 934 for (r = 0; r < n; ++r) lrows[r] = -1; 935 PetscCall(PetscMalloc1(N, &rrows)); 936 for (r = 0; r < N; ++r) { 937 const PetscInt idx = rows[r]; 938 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 939 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 940 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 941 } 942 rrows[r].rank = p; 943 rrows[r].index = rows[r] - owners[p]; 944 } 945 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 946 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 947 /* Collect flags for rows to be zeroed */ 948 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 950 PetscCall(PetscSFDestroy(&sf)); 951 /* Compress and put in row numbers */ 952 for (r = 0; r < n; ++r) 953 if (lrows[r] >= 0) lrows[len++] = r; 954 /* zero diagonal part of matrix */ 955 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 956 /* handle off-diagonal part of matrix */ 957 PetscCall(MatCreateVecs(A, &xmask, NULL)); 958 PetscCall(VecDuplicate(l->lvec, &lmask)); 959 PetscCall(VecGetArray(xmask, &bb)); 960 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 961 PetscCall(VecRestoreArray(xmask, &bb)); 962 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 964 PetscCall(VecDestroy(&xmask)); 965 if (x && b) { /* this code is buggy when the row and column layout don't match */ 966 PetscBool cong; 967 968 PetscCall(MatHasCongruentLayouts(A, &cong)); 969 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 970 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 972 PetscCall(VecGetArrayRead(l->lvec, &xx)); 973 PetscCall(VecGetArray(b, &bb)); 974 } 975 PetscCall(VecGetArray(lmask, &mask)); 976 /* remove zeroed rows of off-diagonal matrix */ 977 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 978 ii = aij->i; 979 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 980 /* loop over all elements of off process part of matrix zeroing removed columns*/ 981 if (aij->compressedrow.use) { 982 m = aij->compressedrow.nrows; 983 ii = aij->compressedrow.i; 984 ridx = aij->compressedrow.rindex; 985 for (i = 0; i < m; i++) { 986 n = ii[i + 1] - ii[i]; 987 aj = aij->j + ii[i]; 988 aa = aij_a + ii[i]; 989 990 for (j = 0; j < n; j++) { 991 if (PetscAbsScalar(mask[*aj])) { 992 if (b) bb[*ridx] -= *aa * xx[*aj]; 993 *aa = 0.0; 994 } 995 aa++; 996 aj++; 997 } 998 ridx++; 999 } 1000 } else { /* do not use compressed row format */ 1001 m = l->B->rmap->n; 1002 for (i = 0; i < m; i++) { 1003 n = ii[i + 1] - ii[i]; 1004 aj = aij->j + ii[i]; 1005 aa = aij_a + ii[i]; 1006 for (j = 0; j < n; j++) { 1007 if (PetscAbsScalar(mask[*aj])) { 1008 if (b) bb[i] -= *aa * xx[*aj]; 1009 *aa = 0.0; 1010 } 1011 aa++; 1012 aj++; 1013 } 1014 } 1015 } 1016 if (x && b) { 1017 PetscCall(VecRestoreArray(b, &bb)); 1018 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1019 } 1020 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1021 PetscCall(VecRestoreArray(lmask, &mask)); 1022 PetscCall(VecDestroy(&lmask)); 1023 PetscCall(PetscFree(lrows)); 1024 1025 /* only change matrix nonzero state if pattern was allowed to be changed */ 1026 if (!((Mat_SeqAIJ *)(l->A->data))->nonew) { 1027 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1028 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1029 } 1030 PetscFunctionReturn(PETSC_SUCCESS); 1031 } 1032 1033 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1034 { 1035 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1036 PetscInt nt; 1037 VecScatter Mvctx = a->Mvctx; 1038 1039 PetscFunctionBegin; 1040 PetscCall(VecGetLocalSize(xx, &nt)); 1041 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1042 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1043 PetscUseTypeMethod(a->A, mult, xx, yy); 1044 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1045 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1046 PetscFunctionReturn(PETSC_SUCCESS); 1047 } 1048 1049 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1050 { 1051 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1052 1053 PetscFunctionBegin; 1054 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1061 VecScatter Mvctx = a->Mvctx; 1062 1063 PetscFunctionBegin; 1064 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1065 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1066 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1067 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1068 PetscFunctionReturn(PETSC_SUCCESS); 1069 } 1070 1071 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1074 1075 PetscFunctionBegin; 1076 /* do nondiagonal part */ 1077 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1078 /* do local part */ 1079 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1080 /* add partial results together */ 1081 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1083 PetscFunctionReturn(PETSC_SUCCESS); 1084 } 1085 1086 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1087 { 1088 MPI_Comm comm; 1089 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1090 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1091 IS Me, Notme; 1092 PetscInt M, N, first, last, *notme, i; 1093 PetscBool lf; 1094 PetscMPIInt size; 1095 1096 PetscFunctionBegin; 1097 /* Easy test: symmetric diagonal block */ 1098 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1099 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1100 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1101 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1102 PetscCallMPI(MPI_Comm_size(comm, &size)); 1103 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1104 1105 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1106 PetscCall(MatGetSize(Amat, &M, &N)); 1107 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1108 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1109 for (i = 0; i < first; i++) notme[i] = i; 1110 for (i = last; i < M; i++) notme[i - last + first] = i; 1111 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1112 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1113 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1114 Aoff = Aoffs[0]; 1115 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1116 Boff = Boffs[0]; 1117 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1118 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1119 PetscCall(MatDestroyMatrices(1, &Boffs)); 1120 PetscCall(ISDestroy(&Me)); 1121 PetscCall(ISDestroy(&Notme)); 1122 PetscCall(PetscFree(notme)); 1123 PetscFunctionReturn(PETSC_SUCCESS); 1124 } 1125 1126 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1127 { 1128 PetscFunctionBegin; 1129 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1130 PetscFunctionReturn(PETSC_SUCCESS); 1131 } 1132 1133 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1134 { 1135 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1136 1137 PetscFunctionBegin; 1138 /* do nondiagonal part */ 1139 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1140 /* do local part */ 1141 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1142 /* add partial results together */ 1143 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1144 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1145 PetscFunctionReturn(PETSC_SUCCESS); 1146 } 1147 1148 /* 1149 This only works correctly for square matrices where the subblock A->A is the 1150 diagonal block 1151 */ 1152 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1153 { 1154 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1155 1156 PetscFunctionBegin; 1157 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1158 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1159 PetscCall(MatGetDiagonal(a->A, v)); 1160 PetscFunctionReturn(PETSC_SUCCESS); 1161 } 1162 1163 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1164 { 1165 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1166 1167 PetscFunctionBegin; 1168 PetscCall(MatScale(a->A, aa)); 1169 PetscCall(MatScale(a->B, aa)); 1170 PetscFunctionReturn(PETSC_SUCCESS); 1171 } 1172 1173 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1174 { 1175 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1176 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1177 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1178 const PetscInt *garray = aij->garray; 1179 const PetscScalar *aa, *ba; 1180 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1181 PetscInt64 nz, hnz; 1182 PetscInt *rowlens; 1183 PetscInt *colidxs; 1184 PetscScalar *matvals; 1185 PetscMPIInt rank; 1186 1187 PetscFunctionBegin; 1188 PetscCall(PetscViewerSetUp(viewer)); 1189 1190 M = mat->rmap->N; 1191 N = mat->cmap->N; 1192 m = mat->rmap->n; 1193 rs = mat->rmap->rstart; 1194 cs = mat->cmap->rstart; 1195 nz = A->nz + B->nz; 1196 1197 /* write matrix header */ 1198 header[0] = MAT_FILE_CLASSID; 1199 header[1] = M; 1200 header[2] = N; 1201 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1202 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1203 if (rank == 0) { 1204 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1205 else header[3] = (PetscInt)hnz; 1206 } 1207 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1208 1209 /* fill in and store row lengths */ 1210 PetscCall(PetscMalloc1(m, &rowlens)); 1211 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1212 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1213 PetscCall(PetscFree(rowlens)); 1214 1215 /* fill in and store column indices */ 1216 PetscCall(PetscMalloc1(nz, &colidxs)); 1217 for (cnt = 0, i = 0; i < m; i++) { 1218 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1219 if (garray[B->j[jb]] > cs) break; 1220 colidxs[cnt++] = garray[B->j[jb]]; 1221 } 1222 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1223 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1224 } 1225 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1226 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1227 PetscCall(PetscFree(colidxs)); 1228 1229 /* fill in and store nonzero values */ 1230 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1231 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1232 PetscCall(PetscMalloc1(nz, &matvals)); 1233 for (cnt = 0, i = 0; i < m; i++) { 1234 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1235 if (garray[B->j[jb]] > cs) break; 1236 matvals[cnt++] = ba[jb]; 1237 } 1238 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1239 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1240 } 1241 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1242 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1243 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1244 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1245 PetscCall(PetscFree(matvals)); 1246 1247 /* write block size option to the viewer's .info file */ 1248 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1249 PetscFunctionReturn(PETSC_SUCCESS); 1250 } 1251 1252 #include <petscdraw.h> 1253 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1254 { 1255 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1256 PetscMPIInt rank = aij->rank, size = aij->size; 1257 PetscBool isdraw, iascii, isbinary; 1258 PetscViewer sviewer; 1259 PetscViewerFormat format; 1260 1261 PetscFunctionBegin; 1262 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1263 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1264 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1265 if (iascii) { 1266 PetscCall(PetscViewerGetFormat(viewer, &format)); 1267 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1268 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1269 PetscCall(PetscMalloc1(size, &nz)); 1270 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1271 for (i = 0; i < (PetscInt)size; i++) { 1272 nmax = PetscMax(nmax, nz[i]); 1273 nmin = PetscMin(nmin, nz[i]); 1274 navg += nz[i]; 1275 } 1276 PetscCall(PetscFree(nz)); 1277 navg = navg / size; 1278 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 PetscCall(PetscViewerGetFormat(viewer, &format)); 1282 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1283 MatInfo info; 1284 PetscInt *inodes = NULL; 1285 1286 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1287 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1288 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1289 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1290 if (!inodes) { 1291 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1292 (double)info.memory)); 1293 } else { 1294 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1295 (double)info.memory)); 1296 } 1297 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1298 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1299 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1301 PetscCall(PetscViewerFlush(viewer)); 1302 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1303 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1304 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1305 PetscFunctionReturn(PETSC_SUCCESS); 1306 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1307 PetscInt inodecount, inodelimit, *inodes; 1308 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1309 if (inodes) { 1310 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1311 } else { 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1313 } 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1316 PetscFunctionReturn(PETSC_SUCCESS); 1317 } 1318 } else if (isbinary) { 1319 if (size == 1) { 1320 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1321 PetscCall(MatView(aij->A, viewer)); 1322 } else { 1323 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1324 } 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } else if (iascii && size == 1) { 1327 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1328 PetscCall(MatView(aij->A, viewer)); 1329 PetscFunctionReturn(PETSC_SUCCESS); 1330 } else if (isdraw) { 1331 PetscDraw draw; 1332 PetscBool isnull; 1333 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1334 PetscCall(PetscDrawIsNull(draw, &isnull)); 1335 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1336 } 1337 1338 { /* assemble the entire matrix onto first processor */ 1339 Mat A = NULL, Av; 1340 IS isrow, iscol; 1341 1342 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1344 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1345 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1346 /* The commented code uses MatCreateSubMatrices instead */ 1347 /* 1348 Mat *AA, A = NULL, Av; 1349 IS isrow,iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1353 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1354 if (rank == 0) { 1355 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1356 A = AA[0]; 1357 Av = AA[0]; 1358 } 1359 PetscCall(MatDestroySubMatrices(1,&AA)); 1360 */ 1361 PetscCall(ISDestroy(&iscol)); 1362 PetscCall(ISDestroy(&isrow)); 1363 /* 1364 Everyone has to call to draw the matrix since the graphics waits are 1365 synchronized across all processors that share the PetscDraw object 1366 */ 1367 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1368 if (rank == 0) { 1369 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1370 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1371 } 1372 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1373 PetscCall(MatDestroy(&A)); 1374 } 1375 PetscFunctionReturn(PETSC_SUCCESS); 1376 } 1377 1378 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1379 { 1380 PetscBool iascii, isdraw, issocket, isbinary; 1381 1382 PetscFunctionBegin; 1383 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1384 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1385 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1386 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1387 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1388 PetscFunctionReturn(PETSC_SUCCESS); 1389 } 1390 1391 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1392 { 1393 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1394 Vec bb1 = NULL; 1395 PetscBool hasop; 1396 1397 PetscFunctionBegin; 1398 if (flag == SOR_APPLY_UPPER) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 PetscFunctionReturn(PETSC_SUCCESS); 1401 } 1402 1403 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1404 1405 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1406 if (flag & SOR_ZERO_INITIAL_GUESS) { 1407 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1408 its--; 1409 } 1410 1411 while (its--) { 1412 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1413 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1414 1415 /* update rhs: bb1 = bb - B*x */ 1416 PetscCall(VecScale(mat->lvec, -1.0)); 1417 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1418 1419 /* local sweep */ 1420 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1421 } 1422 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1423 if (flag & SOR_ZERO_INITIAL_GUESS) { 1424 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1425 its--; 1426 } 1427 while (its--) { 1428 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1429 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1430 1431 /* update rhs: bb1 = bb - B*x */ 1432 PetscCall(VecScale(mat->lvec, -1.0)); 1433 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1434 1435 /* local sweep */ 1436 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1437 } 1438 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1439 if (flag & SOR_ZERO_INITIAL_GUESS) { 1440 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1441 its--; 1442 } 1443 while (its--) { 1444 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1445 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1446 1447 /* update rhs: bb1 = bb - B*x */ 1448 PetscCall(VecScale(mat->lvec, -1.0)); 1449 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1450 1451 /* local sweep */ 1452 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1453 } 1454 } else if (flag & SOR_EISENSTAT) { 1455 Vec xx1; 1456 1457 PetscCall(VecDuplicate(bb, &xx1)); 1458 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1459 1460 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1461 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1462 if (!mat->diag) { 1463 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1464 PetscCall(MatGetDiagonal(matin, mat->diag)); 1465 } 1466 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1467 if (hasop) { 1468 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1469 } else { 1470 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1471 } 1472 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1473 1474 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1475 1476 /* local sweep */ 1477 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1478 PetscCall(VecAXPY(xx, 1.0, xx1)); 1479 PetscCall(VecDestroy(&xx1)); 1480 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1481 1482 PetscCall(VecDestroy(&bb1)); 1483 1484 matin->factorerrortype = mat->A->factorerrortype; 1485 PetscFunctionReturn(PETSC_SUCCESS); 1486 } 1487 1488 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1489 { 1490 Mat aA, aB, Aperm; 1491 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1492 PetscScalar *aa, *ba; 1493 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1494 PetscSF rowsf, sf; 1495 IS parcolp = NULL; 1496 PetscBool done; 1497 1498 PetscFunctionBegin; 1499 PetscCall(MatGetLocalSize(A, &m, &n)); 1500 PetscCall(ISGetIndices(rowp, &rwant)); 1501 PetscCall(ISGetIndices(colp, &cwant)); 1502 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1503 1504 /* Invert row permutation to find out where my rows should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1506 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1507 PetscCall(PetscSFSetFromOptions(rowsf)); 1508 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1511 1512 /* Invert column permutation to find out where my columns should go */ 1513 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1514 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1515 PetscCall(PetscSFSetFromOptions(sf)); 1516 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1517 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1518 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1519 PetscCall(PetscSFDestroy(&sf)); 1520 1521 PetscCall(ISRestoreIndices(rowp, &rwant)); 1522 PetscCall(ISRestoreIndices(colp, &cwant)); 1523 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1524 1525 /* Find out where my gcols should go */ 1526 PetscCall(MatGetSize(aB, NULL, &ng)); 1527 PetscCall(PetscMalloc1(ng, &gcdest)); 1528 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1529 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1530 PetscCall(PetscSFSetFromOptions(sf)); 1531 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1532 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1533 PetscCall(PetscSFDestroy(&sf)); 1534 1535 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1536 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1537 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1538 for (i = 0; i < m; i++) { 1539 PetscInt row = rdest[i]; 1540 PetscMPIInt rowner; 1541 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1542 for (j = ai[i]; j < ai[i + 1]; j++) { 1543 PetscInt col = cdest[aj[j]]; 1544 PetscMPIInt cowner; 1545 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1546 if (rowner == cowner) dnnz[i]++; 1547 else onnz[i]++; 1548 } 1549 for (j = bi[i]; j < bi[i + 1]; j++) { 1550 PetscInt col = gcdest[bj[j]]; 1551 PetscMPIInt cowner; 1552 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1553 if (rowner == cowner) dnnz[i]++; 1554 else onnz[i]++; 1555 } 1556 } 1557 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1558 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1559 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1560 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1561 PetscCall(PetscSFDestroy(&rowsf)); 1562 1563 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1564 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1565 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1566 for (i = 0; i < m; i++) { 1567 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1568 PetscInt j0, rowlen; 1569 rowlen = ai[i + 1] - ai[i]; 1570 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1571 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1572 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1573 } 1574 rowlen = bi[i + 1] - bi[i]; 1575 for (j0 = j = 0; j < rowlen; j0 = j) { 1576 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1577 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1578 } 1579 } 1580 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1581 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1582 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1583 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1584 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1585 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1586 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1587 PetscCall(PetscFree3(work, rdest, cdest)); 1588 PetscCall(PetscFree(gcdest)); 1589 if (parcolp) PetscCall(ISDestroy(&colp)); 1590 *B = Aperm; 1591 PetscFunctionReturn(PETSC_SUCCESS); 1592 } 1593 1594 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1595 { 1596 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1597 1598 PetscFunctionBegin; 1599 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1600 if (ghosts) *ghosts = aij->garray; 1601 PetscFunctionReturn(PETSC_SUCCESS); 1602 } 1603 1604 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1607 Mat A = mat->A, B = mat->B; 1608 PetscLogDouble isend[5], irecv[5]; 1609 1610 PetscFunctionBegin; 1611 info->block_size = 1.0; 1612 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1613 1614 isend[0] = info->nz_used; 1615 isend[1] = info->nz_allocated; 1616 isend[2] = info->nz_unneeded; 1617 isend[3] = info->memory; 1618 isend[4] = info->mallocs; 1619 1620 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1621 1622 isend[0] += info->nz_used; 1623 isend[1] += info->nz_allocated; 1624 isend[2] += info->nz_unneeded; 1625 isend[3] += info->memory; 1626 isend[4] += info->mallocs; 1627 if (flag == MAT_LOCAL) { 1628 info->nz_used = isend[0]; 1629 info->nz_allocated = isend[1]; 1630 info->nz_unneeded = isend[2]; 1631 info->memory = isend[3]; 1632 info->mallocs = isend[4]; 1633 } else if (flag == MAT_GLOBAL_MAX) { 1634 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } else if (flag == MAT_GLOBAL_SUM) { 1642 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1643 1644 info->nz_used = irecv[0]; 1645 info->nz_allocated = irecv[1]; 1646 info->nz_unneeded = irecv[2]; 1647 info->memory = irecv[3]; 1648 info->mallocs = irecv[4]; 1649 } 1650 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1651 info->fill_ratio_needed = 0; 1652 info->factor_mallocs = 0; 1653 PetscFunctionReturn(PETSC_SUCCESS); 1654 } 1655 1656 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1657 { 1658 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1659 1660 PetscFunctionBegin; 1661 switch (op) { 1662 case MAT_NEW_NONZERO_LOCATIONS: 1663 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1664 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1665 case MAT_KEEP_NONZERO_PATTERN: 1666 case MAT_NEW_NONZERO_LOCATION_ERR: 1667 case MAT_USE_INODES: 1668 case MAT_IGNORE_ZERO_ENTRIES: 1669 case MAT_FORM_EXPLICIT_TRANSPOSE: 1670 MatCheckPreallocated(A, 1); 1671 PetscCall(MatSetOption(a->A, op, flg)); 1672 PetscCall(MatSetOption(a->B, op, flg)); 1673 break; 1674 case MAT_ROW_ORIENTED: 1675 MatCheckPreallocated(A, 1); 1676 a->roworiented = flg; 1677 1678 PetscCall(MatSetOption(a->A, op, flg)); 1679 PetscCall(MatSetOption(a->B, op, flg)); 1680 break; 1681 case MAT_FORCE_DIAGONAL_ENTRIES: 1682 case MAT_SORTED_FULL: 1683 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1684 break; 1685 case MAT_IGNORE_OFF_PROC_ENTRIES: 1686 a->donotstash = flg; 1687 break; 1688 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1689 case MAT_SPD: 1690 case MAT_SYMMETRIC: 1691 case MAT_STRUCTURALLY_SYMMETRIC: 1692 case MAT_HERMITIAN: 1693 case MAT_SYMMETRY_ETERNAL: 1694 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1695 case MAT_SPD_ETERNAL: 1696 /* if the diagonal matrix is square it inherits some of the properties above */ 1697 break; 1698 case MAT_SUBMAT_SINGLEIS: 1699 A->submat_singleis = flg; 1700 break; 1701 case MAT_STRUCTURE_ONLY: 1702 /* The option is handled directly by MatSetOption() */ 1703 break; 1704 default: 1705 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1706 } 1707 PetscFunctionReturn(PETSC_SUCCESS); 1708 } 1709 1710 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1711 { 1712 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1713 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1714 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1715 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1716 PetscInt *cmap, *idx_p; 1717 1718 PetscFunctionBegin; 1719 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1720 mat->getrowactive = PETSC_TRUE; 1721 1722 if (!mat->rowvalues && (idx || v)) { 1723 /* 1724 allocate enough space to hold information from the longest row. 1725 */ 1726 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1727 PetscInt max = 1, tmp; 1728 for (i = 0; i < matin->rmap->n; i++) { 1729 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1730 if (max < tmp) max = tmp; 1731 } 1732 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1733 } 1734 1735 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1736 lrow = row - rstart; 1737 1738 pvA = &vworkA; 1739 pcA = &cworkA; 1740 pvB = &vworkB; 1741 pcB = &cworkB; 1742 if (!v) { 1743 pvA = NULL; 1744 pvB = NULL; 1745 } 1746 if (!idx) { 1747 pcA = NULL; 1748 if (!v) pcB = NULL; 1749 } 1750 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1751 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1752 nztot = nzA + nzB; 1753 1754 cmap = mat->garray; 1755 if (v || idx) { 1756 if (nztot) { 1757 /* Sort by increasing column numbers, assuming A and B already sorted */ 1758 PetscInt imark = -1; 1759 if (v) { 1760 *v = v_p = mat->rowvalues; 1761 for (i = 0; i < nzB; i++) { 1762 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1763 else break; 1764 } 1765 imark = i; 1766 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1767 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1768 } 1769 if (idx) { 1770 *idx = idx_p = mat->rowindices; 1771 if (imark > -1) { 1772 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1773 } else { 1774 for (i = 0; i < nzB; i++) { 1775 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1776 else break; 1777 } 1778 imark = i; 1779 } 1780 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1781 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1782 } 1783 } else { 1784 if (idx) *idx = NULL; 1785 if (v) *v = NULL; 1786 } 1787 } 1788 *nz = nztot; 1789 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1790 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1795 { 1796 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1797 1798 PetscFunctionBegin; 1799 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1800 aij->getrowactive = PETSC_FALSE; 1801 PetscFunctionReturn(PETSC_SUCCESS); 1802 } 1803 1804 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1805 { 1806 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1807 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1808 PetscInt i, j, cstart = mat->cmap->rstart; 1809 PetscReal sum = 0.0; 1810 const MatScalar *v, *amata, *bmata; 1811 1812 PetscFunctionBegin; 1813 if (aij->size == 1) { 1814 PetscCall(MatNorm(aij->A, type, norm)); 1815 } else { 1816 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1817 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1818 if (type == NORM_FROBENIUS) { 1819 v = amata; 1820 for (i = 0; i < amat->nz; i++) { 1821 sum += PetscRealPart(PetscConj(*v) * (*v)); 1822 v++; 1823 } 1824 v = bmata; 1825 for (i = 0; i < bmat->nz; i++) { 1826 sum += PetscRealPart(PetscConj(*v) * (*v)); 1827 v++; 1828 } 1829 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1830 *norm = PetscSqrtReal(*norm); 1831 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1832 } else if (type == NORM_1) { /* max column norm */ 1833 PetscReal *tmp, *tmp2; 1834 PetscInt *jj, *garray = aij->garray; 1835 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1836 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1837 *norm = 0.0; 1838 v = amata; 1839 jj = amat->j; 1840 for (j = 0; j < amat->nz; j++) { 1841 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1842 v++; 1843 } 1844 v = bmata; 1845 jj = bmat->j; 1846 for (j = 0; j < bmat->nz; j++) { 1847 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1848 v++; 1849 } 1850 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1851 for (j = 0; j < mat->cmap->N; j++) { 1852 if (tmp2[j] > *norm) *norm = tmp2[j]; 1853 } 1854 PetscCall(PetscFree(tmp)); 1855 PetscCall(PetscFree(tmp2)); 1856 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1857 } else if (type == NORM_INFINITY) { /* max row norm */ 1858 PetscReal ntemp = 0.0; 1859 for (j = 0; j < aij->A->rmap->n; j++) { 1860 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1861 sum = 0.0; 1862 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1863 sum += PetscAbsScalar(*v); 1864 v++; 1865 } 1866 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1867 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1868 sum += PetscAbsScalar(*v); 1869 v++; 1870 } 1871 if (sum > ntemp) ntemp = sum; 1872 } 1873 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1874 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1875 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1876 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1877 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1878 } 1879 PetscFunctionReturn(PETSC_SUCCESS); 1880 } 1881 1882 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1883 { 1884 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1885 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1886 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1887 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1888 Mat B, A_diag, *B_diag; 1889 const MatScalar *pbv, *bv; 1890 1891 PetscFunctionBegin; 1892 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1893 ma = A->rmap->n; 1894 na = A->cmap->n; 1895 mb = a->B->rmap->n; 1896 nb = a->B->cmap->n; 1897 ai = Aloc->i; 1898 aj = Aloc->j; 1899 bi = Bloc->i; 1900 bj = Bloc->j; 1901 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1902 PetscInt *d_nnz, *g_nnz, *o_nnz; 1903 PetscSFNode *oloc; 1904 PETSC_UNUSED PetscSF sf; 1905 1906 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1907 /* compute d_nnz for preallocation */ 1908 PetscCall(PetscArrayzero(d_nnz, na)); 1909 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1910 /* compute local off-diagonal contributions */ 1911 PetscCall(PetscArrayzero(g_nnz, nb)); 1912 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1913 /* map those to global */ 1914 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1915 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1916 PetscCall(PetscSFSetFromOptions(sf)); 1917 PetscCall(PetscArrayzero(o_nnz, na)); 1918 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1919 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1920 PetscCall(PetscSFDestroy(&sf)); 1921 1922 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1923 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1924 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1925 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1926 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1927 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1928 } else { 1929 B = *matout; 1930 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1931 } 1932 1933 b = (Mat_MPIAIJ *)B->data; 1934 A_diag = a->A; 1935 B_diag = &b->A; 1936 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1937 A_diag_ncol = A_diag->cmap->N; 1938 B_diag_ilen = sub_B_diag->ilen; 1939 B_diag_i = sub_B_diag->i; 1940 1941 /* Set ilen for diagonal of B */ 1942 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1943 1944 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1945 very quickly (=without using MatSetValues), because all writes are local. */ 1946 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1947 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1948 1949 /* copy over the B part */ 1950 PetscCall(PetscMalloc1(bi[mb], &cols)); 1951 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1952 pbv = bv; 1953 row = A->rmap->rstart; 1954 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1955 cols_tmp = cols; 1956 for (i = 0; i < mb; i++) { 1957 ncol = bi[i + 1] - bi[i]; 1958 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1959 row++; 1960 if (pbv) pbv += ncol; 1961 if (cols_tmp) cols_tmp += ncol; 1962 } 1963 PetscCall(PetscFree(cols)); 1964 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1965 1966 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1967 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1968 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1969 *matout = B; 1970 } else { 1971 PetscCall(MatHeaderMerge(A, &B)); 1972 } 1973 PetscFunctionReturn(PETSC_SUCCESS); 1974 } 1975 1976 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1977 { 1978 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1979 Mat a = aij->A, b = aij->B; 1980 PetscInt s1, s2, s3; 1981 1982 PetscFunctionBegin; 1983 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1984 if (rr) { 1985 PetscCall(VecGetLocalSize(rr, &s1)); 1986 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1987 /* Overlap communication with computation. */ 1988 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1989 } 1990 if (ll) { 1991 PetscCall(VecGetLocalSize(ll, &s1)); 1992 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1993 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1994 } 1995 /* scale the diagonal block */ 1996 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1997 1998 if (rr) { 1999 /* Do a scatter end and then right scale the off-diagonal block */ 2000 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2001 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2002 } 2003 PetscFunctionReturn(PETSC_SUCCESS); 2004 } 2005 2006 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2007 { 2008 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2009 2010 PetscFunctionBegin; 2011 PetscCall(MatSetUnfactored(a->A)); 2012 PetscFunctionReturn(PETSC_SUCCESS); 2013 } 2014 2015 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2016 { 2017 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2018 Mat a, b, c, d; 2019 PetscBool flg; 2020 2021 PetscFunctionBegin; 2022 a = matA->A; 2023 b = matA->B; 2024 c = matB->A; 2025 d = matB->B; 2026 2027 PetscCall(MatEqual(a, c, &flg)); 2028 if (flg) PetscCall(MatEqual(b, d, &flg)); 2029 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2030 PetscFunctionReturn(PETSC_SUCCESS); 2031 } 2032 2033 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2034 { 2035 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2036 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2037 2038 PetscFunctionBegin; 2039 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2040 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2041 /* because of the column compression in the off-processor part of the matrix a->B, 2042 the number of columns in a->B and b->B may be different, hence we cannot call 2043 the MatCopy() directly on the two parts. If need be, we can provide a more 2044 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2045 then copying the submatrices */ 2046 PetscCall(MatCopy_Basic(A, B, str)); 2047 } else { 2048 PetscCall(MatCopy(a->A, b->A, str)); 2049 PetscCall(MatCopy(a->B, b->B, str)); 2050 } 2051 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2052 PetscFunctionReturn(PETSC_SUCCESS); 2053 } 2054 2055 /* 2056 Computes the number of nonzeros per row needed for preallocation when X and Y 2057 have different nonzero structure. 2058 */ 2059 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2060 { 2061 PetscInt i, j, k, nzx, nzy; 2062 2063 PetscFunctionBegin; 2064 /* Set the number of nonzeros in the new matrix */ 2065 for (i = 0; i < m; i++) { 2066 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2067 nzx = xi[i + 1] - xi[i]; 2068 nzy = yi[i + 1] - yi[i]; 2069 nnz[i] = 0; 2070 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2071 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2072 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2073 nnz[i]++; 2074 } 2075 for (; k < nzy; k++) nnz[i]++; 2076 } 2077 PetscFunctionReturn(PETSC_SUCCESS); 2078 } 2079 2080 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2081 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2082 { 2083 PetscInt m = Y->rmap->N; 2084 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2085 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2086 2087 PetscFunctionBegin; 2088 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2089 PetscFunctionReturn(PETSC_SUCCESS); 2090 } 2091 2092 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2093 { 2094 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2095 2096 PetscFunctionBegin; 2097 if (str == SAME_NONZERO_PATTERN) { 2098 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2099 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2100 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2101 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2102 } else { 2103 Mat B; 2104 PetscInt *nnz_d, *nnz_o; 2105 2106 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2107 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2108 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2109 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2110 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2111 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2112 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2113 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2114 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2115 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2116 PetscCall(MatHeaderMerge(Y, &B)); 2117 PetscCall(PetscFree(nnz_d)); 2118 PetscCall(PetscFree(nnz_o)); 2119 } 2120 PetscFunctionReturn(PETSC_SUCCESS); 2121 } 2122 2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2124 2125 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2126 { 2127 PetscFunctionBegin; 2128 if (PetscDefined(USE_COMPLEX)) { 2129 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2130 2131 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2132 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2133 } 2134 PetscFunctionReturn(PETSC_SUCCESS); 2135 } 2136 2137 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2138 { 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2140 2141 PetscFunctionBegin; 2142 PetscCall(MatRealPart(a->A)); 2143 PetscCall(MatRealPart(a->B)); 2144 PetscFunctionReturn(PETSC_SUCCESS); 2145 } 2146 2147 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2148 { 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2150 2151 PetscFunctionBegin; 2152 PetscCall(MatImaginaryPart(a->A)); 2153 PetscCall(MatImaginaryPart(a->B)); 2154 PetscFunctionReturn(PETSC_SUCCESS); 2155 } 2156 2157 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2158 { 2159 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2160 PetscInt i, *idxb = NULL, m = A->rmap->n; 2161 PetscScalar *va, *vv; 2162 Vec vB, vA; 2163 const PetscScalar *vb; 2164 2165 PetscFunctionBegin; 2166 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2167 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2168 2169 PetscCall(VecGetArrayWrite(vA, &va)); 2170 if (idx) { 2171 for (i = 0; i < m; i++) { 2172 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2173 } 2174 } 2175 2176 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2177 PetscCall(PetscMalloc1(m, &idxb)); 2178 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2179 2180 PetscCall(VecGetArrayWrite(v, &vv)); 2181 PetscCall(VecGetArrayRead(vB, &vb)); 2182 for (i = 0; i < m; i++) { 2183 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2184 vv[i] = vb[i]; 2185 if (idx) idx[i] = a->garray[idxb[i]]; 2186 } else { 2187 vv[i] = va[i]; 2188 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2189 } 2190 } 2191 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2192 PetscCall(VecRestoreArrayWrite(vA, &va)); 2193 PetscCall(VecRestoreArrayRead(vB, &vb)); 2194 PetscCall(PetscFree(idxb)); 2195 PetscCall(VecDestroy(&vA)); 2196 PetscCall(VecDestroy(&vB)); 2197 PetscFunctionReturn(PETSC_SUCCESS); 2198 } 2199 2200 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2201 { 2202 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2203 PetscInt m = A->rmap->n; 2204 Vec vB, vA; 2205 2206 PetscFunctionBegin; 2207 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2208 PetscCall(MatGetRowSumAbs(a->A, vA)); 2209 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2210 PetscCall(MatGetRowSumAbs(a->B, vB)); 2211 PetscCall(VecAXPY(vA, 1.0, vB)); 2212 PetscCall(VecDestroy(&vB)); 2213 PetscCall(VecCopy(vA, v)); 2214 PetscCall(VecDestroy(&vA)); 2215 PetscFunctionReturn(PETSC_SUCCESS); 2216 } 2217 2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2219 { 2220 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2221 PetscInt m = A->rmap->n, n = A->cmap->n; 2222 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2223 PetscInt *cmap = mat->garray; 2224 PetscInt *diagIdx, *offdiagIdx; 2225 Vec diagV, offdiagV; 2226 PetscScalar *a, *diagA, *offdiagA; 2227 const PetscScalar *ba, *bav; 2228 PetscInt r, j, col, ncols, *bi, *bj; 2229 Mat B = mat->B; 2230 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2231 2232 PetscFunctionBegin; 2233 /* When a process holds entire A and other processes have no entry */ 2234 if (A->cmap->N == n) { 2235 PetscCall(VecGetArrayWrite(v, &diagA)); 2236 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2237 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2238 PetscCall(VecDestroy(&diagV)); 2239 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } else if (n == 0) { 2242 if (m) { 2243 PetscCall(VecGetArrayWrite(v, &a)); 2244 for (r = 0; r < m; r++) { 2245 a[r] = 0.0; 2246 if (idx) idx[r] = -1; 2247 } 2248 PetscCall(VecRestoreArrayWrite(v, &a)); 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2256 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2257 2258 /* Get offdiagIdx[] for implicit 0.0 */ 2259 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2260 ba = bav; 2261 bi = b->i; 2262 bj = b->j; 2263 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2264 for (r = 0; r < m; r++) { 2265 ncols = bi[r + 1] - bi[r]; 2266 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2267 offdiagA[r] = *ba; 2268 offdiagIdx[r] = cmap[0]; 2269 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2270 offdiagA[r] = 0.0; 2271 2272 /* Find first hole in the cmap */ 2273 for (j = 0; j < ncols; j++) { 2274 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2275 if (col > j && j < cstart) { 2276 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2277 break; 2278 } else if (col > j + n && j >= cstart) { 2279 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2280 break; 2281 } 2282 } 2283 if (j == ncols && ncols < A->cmap->N - n) { 2284 /* a hole is outside compressed Bcols */ 2285 if (ncols == 0) { 2286 if (cstart) { 2287 offdiagIdx[r] = 0; 2288 } else offdiagIdx[r] = cend; 2289 } else { /* ncols > 0 */ 2290 offdiagIdx[r] = cmap[ncols - 1] + 1; 2291 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2292 } 2293 } 2294 } 2295 2296 for (j = 0; j < ncols; j++) { 2297 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2298 offdiagA[r] = *ba; 2299 offdiagIdx[r] = cmap[*bj]; 2300 } 2301 ba++; 2302 bj++; 2303 } 2304 } 2305 2306 PetscCall(VecGetArrayWrite(v, &a)); 2307 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2308 for (r = 0; r < m; ++r) { 2309 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) idx[r] = cstart + diagIdx[r]; 2312 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 if (idx) { 2315 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2316 idx[r] = cstart + diagIdx[r]; 2317 } else idx[r] = offdiagIdx[r]; 2318 } 2319 } else { 2320 a[r] = offdiagA[r]; 2321 if (idx) idx[r] = offdiagIdx[r]; 2322 } 2323 } 2324 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2325 PetscCall(VecRestoreArrayWrite(v, &a)); 2326 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2327 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2328 PetscCall(VecDestroy(&diagV)); 2329 PetscCall(VecDestroy(&offdiagV)); 2330 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2335 { 2336 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2337 PetscInt m = A->rmap->n, n = A->cmap->n; 2338 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2339 PetscInt *cmap = mat->garray; 2340 PetscInt *diagIdx, *offdiagIdx; 2341 Vec diagV, offdiagV; 2342 PetscScalar *a, *diagA, *offdiagA; 2343 const PetscScalar *ba, *bav; 2344 PetscInt r, j, col, ncols, *bi, *bj; 2345 Mat B = mat->B; 2346 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2347 2348 PetscFunctionBegin; 2349 /* When a process holds entire A and other processes have no entry */ 2350 if (A->cmap->N == n) { 2351 PetscCall(VecGetArrayWrite(v, &diagA)); 2352 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2353 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2354 PetscCall(VecDestroy(&diagV)); 2355 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } else if (n == 0) { 2358 if (m) { 2359 PetscCall(VecGetArrayWrite(v, &a)); 2360 for (r = 0; r < m; r++) { 2361 a[r] = PETSC_MAX_REAL; 2362 if (idx) idx[r] = -1; 2363 } 2364 PetscCall(VecRestoreArrayWrite(v, &a)); 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2371 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2372 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2373 2374 /* Get offdiagIdx[] for implicit 0.0 */ 2375 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2376 ba = bav; 2377 bi = b->i; 2378 bj = b->j; 2379 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2380 for (r = 0; r < m; r++) { 2381 ncols = bi[r + 1] - bi[r]; 2382 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2383 offdiagA[r] = *ba; 2384 offdiagIdx[r] = cmap[0]; 2385 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2386 offdiagA[r] = 0.0; 2387 2388 /* Find first hole in the cmap */ 2389 for (j = 0; j < ncols; j++) { 2390 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2391 if (col > j && j < cstart) { 2392 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2393 break; 2394 } else if (col > j + n && j >= cstart) { 2395 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2396 break; 2397 } 2398 } 2399 if (j == ncols && ncols < A->cmap->N - n) { 2400 /* a hole is outside compressed Bcols */ 2401 if (ncols == 0) { 2402 if (cstart) { 2403 offdiagIdx[r] = 0; 2404 } else offdiagIdx[r] = cend; 2405 } else { /* ncols > 0 */ 2406 offdiagIdx[r] = cmap[ncols - 1] + 1; 2407 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2408 } 2409 } 2410 } 2411 2412 for (j = 0; j < ncols; j++) { 2413 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2414 offdiagA[r] = *ba; 2415 offdiagIdx[r] = cmap[*bj]; 2416 } 2417 ba++; 2418 bj++; 2419 } 2420 } 2421 2422 PetscCall(VecGetArrayWrite(v, &a)); 2423 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2424 for (r = 0; r < m; ++r) { 2425 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2426 a[r] = diagA[r]; 2427 if (idx) idx[r] = cstart + diagIdx[r]; 2428 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2429 a[r] = diagA[r]; 2430 if (idx) { 2431 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2432 idx[r] = cstart + diagIdx[r]; 2433 } else idx[r] = offdiagIdx[r]; 2434 } 2435 } else { 2436 a[r] = offdiagA[r]; 2437 if (idx) idx[r] = offdiagIdx[r]; 2438 } 2439 } 2440 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2441 PetscCall(VecRestoreArrayWrite(v, &a)); 2442 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2443 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2444 PetscCall(VecDestroy(&diagV)); 2445 PetscCall(VecDestroy(&offdiagV)); 2446 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2447 PetscFunctionReturn(PETSC_SUCCESS); 2448 } 2449 2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2451 { 2452 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2453 PetscInt m = A->rmap->n, n = A->cmap->n; 2454 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2455 PetscInt *cmap = mat->garray; 2456 PetscInt *diagIdx, *offdiagIdx; 2457 Vec diagV, offdiagV; 2458 PetscScalar *a, *diagA, *offdiagA; 2459 const PetscScalar *ba, *bav; 2460 PetscInt r, j, col, ncols, *bi, *bj; 2461 Mat B = mat->B; 2462 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2463 2464 PetscFunctionBegin; 2465 /* When a process holds entire A and other processes have no entry */ 2466 if (A->cmap->N == n) { 2467 PetscCall(VecGetArrayWrite(v, &diagA)); 2468 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2469 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2470 PetscCall(VecDestroy(&diagV)); 2471 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } else if (n == 0) { 2474 if (m) { 2475 PetscCall(VecGetArrayWrite(v, &a)); 2476 for (r = 0; r < m; r++) { 2477 a[r] = PETSC_MIN_REAL; 2478 if (idx) idx[r] = -1; 2479 } 2480 PetscCall(VecRestoreArrayWrite(v, &a)); 2481 } 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } 2484 2485 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2487 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2488 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r + 1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; 2500 offdiagIdx[r] = cmap[0]; 2501 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2502 offdiagA[r] = 0.0; 2503 2504 /* Find first hole in the cmap */ 2505 for (j = 0; j < ncols; j++) { 2506 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2507 if (col > j && j < cstart) { 2508 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2509 break; 2510 } else if (col > j + n && j >= cstart) { 2511 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2512 break; 2513 } 2514 } 2515 if (j == ncols && ncols < A->cmap->N - n) { 2516 /* a hole is outside compressed Bcols */ 2517 if (ncols == 0) { 2518 if (cstart) { 2519 offdiagIdx[r] = 0; 2520 } else offdiagIdx[r] = cend; 2521 } else { /* ncols > 0 */ 2522 offdiagIdx[r] = cmap[ncols - 1] + 1; 2523 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2524 } 2525 } 2526 } 2527 2528 for (j = 0; j < ncols; j++) { 2529 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2530 offdiagA[r] = *ba; 2531 offdiagIdx[r] = cmap[*bj]; 2532 } 2533 ba++; 2534 bj++; 2535 } 2536 } 2537 2538 PetscCall(VecGetArrayWrite(v, &a)); 2539 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2540 for (r = 0; r < m; ++r) { 2541 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2542 a[r] = diagA[r]; 2543 if (idx) idx[r] = cstart + diagIdx[r]; 2544 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 if (idx) { 2547 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2548 idx[r] = cstart + diagIdx[r]; 2549 } else idx[r] = offdiagIdx[r]; 2550 } 2551 } else { 2552 a[r] = offdiagA[r]; 2553 if (idx) idx[r] = offdiagIdx[r]; 2554 } 2555 } 2556 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2557 PetscCall(VecRestoreArrayWrite(v, &a)); 2558 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2559 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2560 PetscCall(VecDestroy(&diagV)); 2561 PetscCall(VecDestroy(&offdiagV)); 2562 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2567 { 2568 Mat *dummy; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2572 *newmat = *dummy; 2573 PetscCall(PetscFree(dummy)); 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2580 2581 PetscFunctionBegin; 2582 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2583 A->factorerrortype = a->A->factorerrortype; 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2588 { 2589 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2590 2591 PetscFunctionBegin; 2592 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2593 PetscCall(MatSetRandom(aij->A, rctx)); 2594 if (x->assembled) { 2595 PetscCall(MatSetRandom(aij->B, rctx)); 2596 } else { 2597 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2598 } 2599 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2600 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2605 { 2606 PetscFunctionBegin; 2607 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2608 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2614 2615 Not Collective 2616 2617 Input Parameter: 2618 . A - the matrix 2619 2620 Output Parameter: 2621 . nz - the number of nonzeros 2622 2623 Level: advanced 2624 2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2626 @*/ 2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2628 { 2629 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2630 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2631 PetscBool isaij; 2632 2633 PetscFunctionBegin; 2634 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2635 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2636 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 /*@ 2641 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2642 2643 Collective 2644 2645 Input Parameters: 2646 + A - the matrix 2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2648 2649 Level: advanced 2650 2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2652 @*/ 2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2654 { 2655 PetscFunctionBegin; 2656 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2661 { 2662 PetscBool sc = PETSC_FALSE, flg; 2663 2664 PetscFunctionBegin; 2665 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2666 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2667 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2668 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2669 PetscOptionsHeadEnd(); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2674 { 2675 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2676 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2677 2678 PetscFunctionBegin; 2679 if (!Y->preallocated) { 2680 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2681 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2682 PetscInt nonew = aij->nonew; 2683 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2684 aij->nonew = nonew; 2685 } 2686 PetscCall(MatShift_Basic(Y, a)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2696 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2697 if (d) { 2698 PetscInt rstart; 2699 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2700 *d += rstart; 2701 } 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2717 2718 PetscFunctionBegin; 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2720 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2725 MatGetRow_MPIAIJ, 2726 MatRestoreRow_MPIAIJ, 2727 MatMult_MPIAIJ, 2728 /* 4*/ MatMultAdd_MPIAIJ, 2729 MatMultTranspose_MPIAIJ, 2730 MatMultTransposeAdd_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*10*/ NULL, 2735 NULL, 2736 NULL, 2737 MatSOR_MPIAIJ, 2738 MatTranspose_MPIAIJ, 2739 /*15*/ MatGetInfo_MPIAIJ, 2740 MatEqual_MPIAIJ, 2741 MatGetDiagonal_MPIAIJ, 2742 MatDiagonalScale_MPIAIJ, 2743 MatNorm_MPIAIJ, 2744 /*20*/ MatAssemblyBegin_MPIAIJ, 2745 MatAssemblyEnd_MPIAIJ, 2746 MatSetOption_MPIAIJ, 2747 MatZeroEntries_MPIAIJ, 2748 /*24*/ MatZeroRows_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*29*/ MatSetUp_MPI_Hash, 2754 NULL, 2755 NULL, 2756 MatGetDiagonalBlock_MPIAIJ, 2757 NULL, 2758 /*34*/ MatDuplicate_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*39*/ MatAXPY_MPIAIJ, 2764 MatCreateSubMatrices_MPIAIJ, 2765 MatIncreaseOverlap_MPIAIJ, 2766 MatGetValues_MPIAIJ, 2767 MatCopy_MPIAIJ, 2768 /*44*/ MatGetRowMax_MPIAIJ, 2769 MatScale_MPIAIJ, 2770 MatShift_MPIAIJ, 2771 MatDiagonalSet_MPIAIJ, 2772 MatZeroRowsColumns_MPIAIJ, 2773 /*49*/ MatSetRandom_MPIAIJ, 2774 MatGetRowIJ_MPIAIJ, 2775 MatRestoreRowIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*54*/ MatFDColoringCreate_MPIXAIJ, 2779 NULL, 2780 MatSetUnfactored_MPIAIJ, 2781 MatPermute_MPIAIJ, 2782 NULL, 2783 /*59*/ MatCreateSubMatrix_MPIAIJ, 2784 MatDestroy_MPIAIJ, 2785 MatView_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*64*/ NULL, 2789 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2790 NULL, 2791 NULL, 2792 NULL, 2793 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2794 MatGetRowMinAbs_MPIAIJ, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*75*/ MatFDColoringApply_AIJ, 2800 MatSetFromOptions_MPIAIJ, 2801 NULL, 2802 NULL, 2803 MatFindZeroDiagonals_MPIAIJ, 2804 /*80*/ NULL, 2805 NULL, 2806 NULL, 2807 /*83*/ MatLoad_MPIAIJ, 2808 MatIsSymmetric_MPIAIJ, 2809 NULL, 2810 NULL, 2811 NULL, 2812 NULL, 2813 /*89*/ NULL, 2814 NULL, 2815 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2816 NULL, 2817 NULL, 2818 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2819 NULL, 2820 NULL, 2821 NULL, 2822 MatBindToCPU_MPIAIJ, 2823 /*99*/ MatProductSetFromOptions_MPIAIJ, 2824 NULL, 2825 NULL, 2826 MatConjugate_MPIAIJ, 2827 NULL, 2828 /*104*/ MatSetValuesRow_MPIAIJ, 2829 MatRealPart_MPIAIJ, 2830 MatImaginaryPart_MPIAIJ, 2831 NULL, 2832 NULL, 2833 /*109*/ NULL, 2834 NULL, 2835 MatGetRowMin_MPIAIJ, 2836 NULL, 2837 MatMissingDiagonal_MPIAIJ, 2838 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2839 NULL, 2840 MatGetGhosts_MPIAIJ, 2841 NULL, 2842 NULL, 2843 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2844 NULL, 2845 NULL, 2846 NULL, 2847 MatGetMultiProcBlock_MPIAIJ, 2848 /*124*/ MatFindNonzeroRows_MPIAIJ, 2849 MatGetColumnReductions_MPIAIJ, 2850 MatInvertBlockDiagonal_MPIAIJ, 2851 MatInvertVariableBlockDiagonal_MPIAIJ, 2852 MatCreateSubMatricesMPI_MPIAIJ, 2853 /*129*/ NULL, 2854 NULL, 2855 NULL, 2856 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2857 NULL, 2858 /*134*/ NULL, 2859 NULL, 2860 NULL, 2861 NULL, 2862 NULL, 2863 /*139*/ MatSetBlockSizes_MPIAIJ, 2864 NULL, 2865 NULL, 2866 MatFDColoringSetUp_MPIXAIJ, 2867 MatFindOffBlockDiagonalEntries_MPIAIJ, 2868 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2869 /*145*/ NULL, 2870 NULL, 2871 NULL, 2872 MatCreateGraph_Simple_AIJ, 2873 NULL, 2874 /*150*/ NULL, 2875 MatEliminateZeros_MPIAIJ, 2876 MatGetRowSumAbs_MPIAIJ}; 2877 2878 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2879 { 2880 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2881 2882 PetscFunctionBegin; 2883 PetscCall(MatStoreValues(aij->A)); 2884 PetscCall(MatStoreValues(aij->B)); 2885 PetscFunctionReturn(PETSC_SUCCESS); 2886 } 2887 2888 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2889 { 2890 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2891 2892 PetscFunctionBegin; 2893 PetscCall(MatRetrieveValues(aij->A)); 2894 PetscCall(MatRetrieveValues(aij->B)); 2895 PetscFunctionReturn(PETSC_SUCCESS); 2896 } 2897 2898 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2899 { 2900 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2901 PetscMPIInt size; 2902 2903 PetscFunctionBegin; 2904 if (B->hash_active) { 2905 B->ops[0] = b->cops; 2906 B->hash_active = PETSC_FALSE; 2907 } 2908 PetscCall(PetscLayoutSetUp(B->rmap)); 2909 PetscCall(PetscLayoutSetUp(B->cmap)); 2910 2911 #if defined(PETSC_USE_CTABLE) 2912 PetscCall(PetscHMapIDestroy(&b->colmap)); 2913 #else 2914 PetscCall(PetscFree(b->colmap)); 2915 #endif 2916 PetscCall(PetscFree(b->garray)); 2917 PetscCall(VecDestroy(&b->lvec)); 2918 PetscCall(VecScatterDestroy(&b->Mvctx)); 2919 2920 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2921 2922 MatSeqXAIJGetOptions_Private(b->B); 2923 PetscCall(MatDestroy(&b->B)); 2924 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2925 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2926 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2927 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2928 MatSeqXAIJRestoreOptions_Private(b->B); 2929 2930 MatSeqXAIJGetOptions_Private(b->A); 2931 PetscCall(MatDestroy(&b->A)); 2932 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2933 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2934 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2935 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2936 MatSeqXAIJRestoreOptions_Private(b->A); 2937 2938 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2939 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2940 B->preallocated = PETSC_TRUE; 2941 B->was_assembled = PETSC_FALSE; 2942 B->assembled = PETSC_FALSE; 2943 PetscFunctionReturn(PETSC_SUCCESS); 2944 } 2945 2946 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2947 { 2948 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2949 2950 PetscFunctionBegin; 2951 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2952 PetscCall(PetscLayoutSetUp(B->rmap)); 2953 PetscCall(PetscLayoutSetUp(B->cmap)); 2954 2955 #if defined(PETSC_USE_CTABLE) 2956 PetscCall(PetscHMapIDestroy(&b->colmap)); 2957 #else 2958 PetscCall(PetscFree(b->colmap)); 2959 #endif 2960 PetscCall(PetscFree(b->garray)); 2961 PetscCall(VecDestroy(&b->lvec)); 2962 PetscCall(VecScatterDestroy(&b->Mvctx)); 2963 2964 PetscCall(MatResetPreallocation(b->A)); 2965 PetscCall(MatResetPreallocation(b->B)); 2966 B->preallocated = PETSC_TRUE; 2967 B->was_assembled = PETSC_FALSE; 2968 B->assembled = PETSC_FALSE; 2969 PetscFunctionReturn(PETSC_SUCCESS); 2970 } 2971 2972 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2973 { 2974 Mat mat; 2975 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2976 2977 PetscFunctionBegin; 2978 *newmat = NULL; 2979 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2980 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2981 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2982 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2983 a = (Mat_MPIAIJ *)mat->data; 2984 2985 mat->factortype = matin->factortype; 2986 mat->assembled = matin->assembled; 2987 mat->insertmode = NOT_SET_VALUES; 2988 2989 a->size = oldmat->size; 2990 a->rank = oldmat->rank; 2991 a->donotstash = oldmat->donotstash; 2992 a->roworiented = oldmat->roworiented; 2993 a->rowindices = NULL; 2994 a->rowvalues = NULL; 2995 a->getrowactive = PETSC_FALSE; 2996 2997 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2998 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2999 if (matin->hash_active) { 3000 PetscCall(MatSetUp(mat)); 3001 } else { 3002 mat->preallocated = matin->preallocated; 3003 if (oldmat->colmap) { 3004 #if defined(PETSC_USE_CTABLE) 3005 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3006 #else 3007 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3008 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3009 #endif 3010 } else a->colmap = NULL; 3011 if (oldmat->garray) { 3012 PetscInt len; 3013 len = oldmat->B->cmap->n; 3014 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3015 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3016 } else a->garray = NULL; 3017 3018 /* It may happen MatDuplicate is called with a non-assembled matrix 3019 In fact, MatDuplicate only requires the matrix to be preallocated 3020 This may happen inside a DMCreateMatrix_Shell */ 3021 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3022 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3023 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3024 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3025 } 3026 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3027 *newmat = mat; 3028 PetscFunctionReturn(PETSC_SUCCESS); 3029 } 3030 3031 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3032 { 3033 PetscBool isbinary, ishdf5; 3034 3035 PetscFunctionBegin; 3036 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3037 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3038 /* force binary viewer to load .info file if it has not yet done so */ 3039 PetscCall(PetscViewerSetUp(viewer)); 3040 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3041 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3042 if (isbinary) { 3043 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3044 } else if (ishdf5) { 3045 #if defined(PETSC_HAVE_HDF5) 3046 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3047 #else 3048 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3049 #endif 3050 } else { 3051 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3052 } 3053 PetscFunctionReturn(PETSC_SUCCESS); 3054 } 3055 3056 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3057 { 3058 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3059 PetscInt *rowidxs, *colidxs; 3060 PetscScalar *matvals; 3061 3062 PetscFunctionBegin; 3063 PetscCall(PetscViewerSetUp(viewer)); 3064 3065 /* read in matrix header */ 3066 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3067 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3068 M = header[1]; 3069 N = header[2]; 3070 nz = header[3]; 3071 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3072 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3073 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3074 3075 /* set block sizes from the viewer's .info file */ 3076 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3077 /* set global sizes if not set already */ 3078 if (mat->rmap->N < 0) mat->rmap->N = M; 3079 if (mat->cmap->N < 0) mat->cmap->N = N; 3080 PetscCall(PetscLayoutSetUp(mat->rmap)); 3081 PetscCall(PetscLayoutSetUp(mat->cmap)); 3082 3083 /* check if the matrix sizes are correct */ 3084 PetscCall(MatGetSize(mat, &rows, &cols)); 3085 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3086 3087 /* read in row lengths and build row indices */ 3088 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3089 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3090 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3091 rowidxs[0] = 0; 3092 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3093 if (nz != PETSC_MAX_INT) { 3094 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3095 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3096 } 3097 3098 /* read in column indices and matrix values */ 3099 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3100 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3101 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3102 /* store matrix indices and values */ 3103 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3104 PetscCall(PetscFree(rowidxs)); 3105 PetscCall(PetscFree2(colidxs, matvals)); 3106 PetscFunctionReturn(PETSC_SUCCESS); 3107 } 3108 3109 /* Not scalable because of ISAllGather() unless getting all columns. */ 3110 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3111 { 3112 IS iscol_local; 3113 PetscBool isstride; 3114 PetscMPIInt lisstride = 0, gisstride; 3115 3116 PetscFunctionBegin; 3117 /* check if we are grabbing all columns*/ 3118 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3119 3120 if (isstride) { 3121 PetscInt start, len, mstart, mlen; 3122 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3123 PetscCall(ISGetLocalSize(iscol, &len)); 3124 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3125 if (mstart == start && mlen - mstart == len) lisstride = 1; 3126 } 3127 3128 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3129 if (gisstride) { 3130 PetscInt N; 3131 PetscCall(MatGetSize(mat, NULL, &N)); 3132 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3133 PetscCall(ISSetIdentity(iscol_local)); 3134 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3135 } else { 3136 PetscInt cbs; 3137 PetscCall(ISGetBlockSize(iscol, &cbs)); 3138 PetscCall(ISAllGather(iscol, &iscol_local)); 3139 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3140 } 3141 3142 *isseq = iscol_local; 3143 PetscFunctionReturn(PETSC_SUCCESS); 3144 } 3145 3146 /* 3147 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3148 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3149 3150 Input Parameters: 3151 + mat - matrix 3152 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3153 i.e., mat->rstart <= isrow[i] < mat->rend 3154 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3155 i.e., mat->cstart <= iscol[i] < mat->cend 3156 3157 Output Parameters: 3158 + isrow_d - sequential row index set for retrieving mat->A 3159 . iscol_d - sequential column index set for retrieving mat->A 3160 . iscol_o - sequential column index set for retrieving mat->B 3161 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3162 */ 3163 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3164 { 3165 Vec x, cmap; 3166 const PetscInt *is_idx; 3167 PetscScalar *xarray, *cmaparray; 3168 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3169 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3170 Mat B = a->B; 3171 Vec lvec = a->lvec, lcmap; 3172 PetscInt i, cstart, cend, Bn = B->cmap->N; 3173 MPI_Comm comm; 3174 VecScatter Mvctx = a->Mvctx; 3175 3176 PetscFunctionBegin; 3177 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3178 PetscCall(ISGetLocalSize(iscol, &ncols)); 3179 3180 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3181 PetscCall(MatCreateVecs(mat, &x, NULL)); 3182 PetscCall(VecSet(x, -1.0)); 3183 PetscCall(VecDuplicate(x, &cmap)); 3184 PetscCall(VecSet(cmap, -1.0)); 3185 3186 /* Get start indices */ 3187 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3188 isstart -= ncols; 3189 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3190 3191 PetscCall(ISGetIndices(iscol, &is_idx)); 3192 PetscCall(VecGetArray(x, &xarray)); 3193 PetscCall(VecGetArray(cmap, &cmaparray)); 3194 PetscCall(PetscMalloc1(ncols, &idx)); 3195 for (i = 0; i < ncols; i++) { 3196 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3197 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3198 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3199 } 3200 PetscCall(VecRestoreArray(x, &xarray)); 3201 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3202 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3203 3204 /* Get iscol_d */ 3205 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3206 PetscCall(ISGetBlockSize(iscol, &i)); 3207 PetscCall(ISSetBlockSize(*iscol_d, i)); 3208 3209 /* Get isrow_d */ 3210 PetscCall(ISGetLocalSize(isrow, &m)); 3211 rstart = mat->rmap->rstart; 3212 PetscCall(PetscMalloc1(m, &idx)); 3213 PetscCall(ISGetIndices(isrow, &is_idx)); 3214 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3215 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3216 3217 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3218 PetscCall(ISGetBlockSize(isrow, &i)); 3219 PetscCall(ISSetBlockSize(*isrow_d, i)); 3220 3221 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3222 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3223 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3224 3225 PetscCall(VecDuplicate(lvec, &lcmap)); 3226 3227 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3228 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3229 3230 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3231 /* off-process column indices */ 3232 count = 0; 3233 PetscCall(PetscMalloc1(Bn, &idx)); 3234 PetscCall(PetscMalloc1(Bn, &cmap1)); 3235 3236 PetscCall(VecGetArray(lvec, &xarray)); 3237 PetscCall(VecGetArray(lcmap, &cmaparray)); 3238 for (i = 0; i < Bn; i++) { 3239 if (PetscRealPart(xarray[i]) > -1.0) { 3240 idx[count] = i; /* local column index in off-diagonal part B */ 3241 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3242 count++; 3243 } 3244 } 3245 PetscCall(VecRestoreArray(lvec, &xarray)); 3246 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3247 3248 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3249 /* cannot ensure iscol_o has same blocksize as iscol! */ 3250 3251 PetscCall(PetscFree(idx)); 3252 *garray = cmap1; 3253 3254 PetscCall(VecDestroy(&x)); 3255 PetscCall(VecDestroy(&cmap)); 3256 PetscCall(VecDestroy(&lcmap)); 3257 PetscFunctionReturn(PETSC_SUCCESS); 3258 } 3259 3260 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3261 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3262 { 3263 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3264 Mat M = NULL; 3265 MPI_Comm comm; 3266 IS iscol_d, isrow_d, iscol_o; 3267 Mat Asub = NULL, Bsub = NULL; 3268 PetscInt n; 3269 3270 PetscFunctionBegin; 3271 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3272 3273 if (call == MAT_REUSE_MATRIX) { 3274 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3275 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3276 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3277 3278 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3279 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3280 3281 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3282 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3283 3284 /* Update diagonal and off-diagonal portions of submat */ 3285 asub = (Mat_MPIAIJ *)(*submat)->data; 3286 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3287 PetscCall(ISGetLocalSize(iscol_o, &n)); 3288 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3289 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3290 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3291 3292 } else { /* call == MAT_INITIAL_MATRIX) */ 3293 const PetscInt *garray; 3294 PetscInt BsubN; 3295 3296 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3297 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3298 3299 /* Create local submatrices Asub and Bsub */ 3300 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3301 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3302 3303 /* Create submatrix M */ 3304 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3305 3306 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3307 asub = (Mat_MPIAIJ *)M->data; 3308 3309 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3310 n = asub->B->cmap->N; 3311 if (BsubN > n) { 3312 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3313 const PetscInt *idx; 3314 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3315 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3316 3317 PetscCall(PetscMalloc1(n, &idx_new)); 3318 j = 0; 3319 PetscCall(ISGetIndices(iscol_o, &idx)); 3320 for (i = 0; i < n; i++) { 3321 if (j >= BsubN) break; 3322 while (subgarray[i] > garray[j]) j++; 3323 3324 if (subgarray[i] == garray[j]) { 3325 idx_new[i] = idx[j++]; 3326 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3327 } 3328 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3329 3330 PetscCall(ISDestroy(&iscol_o)); 3331 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3332 3333 } else if (BsubN < n) { 3334 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3335 } 3336 3337 PetscCall(PetscFree(garray)); 3338 *submat = M; 3339 3340 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3341 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3342 PetscCall(ISDestroy(&isrow_d)); 3343 3344 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3345 PetscCall(ISDestroy(&iscol_d)); 3346 3347 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3348 PetscCall(ISDestroy(&iscol_o)); 3349 } 3350 PetscFunctionReturn(PETSC_SUCCESS); 3351 } 3352 3353 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3354 { 3355 IS iscol_local = NULL, isrow_d; 3356 PetscInt csize; 3357 PetscInt n, i, j, start, end; 3358 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3359 MPI_Comm comm; 3360 3361 PetscFunctionBegin; 3362 /* If isrow has same processor distribution as mat, 3363 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3364 if (call == MAT_REUSE_MATRIX) { 3365 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3366 if (isrow_d) { 3367 sameRowDist = PETSC_TRUE; 3368 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3369 } else { 3370 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3371 if (iscol_local) { 3372 sameRowDist = PETSC_TRUE; 3373 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3374 } 3375 } 3376 } else { 3377 /* Check if isrow has same processor distribution as mat */ 3378 sameDist[0] = PETSC_FALSE; 3379 PetscCall(ISGetLocalSize(isrow, &n)); 3380 if (!n) { 3381 sameDist[0] = PETSC_TRUE; 3382 } else { 3383 PetscCall(ISGetMinMax(isrow, &i, &j)); 3384 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3385 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3386 } 3387 3388 /* Check if iscol has same processor distribution as mat */ 3389 sameDist[1] = PETSC_FALSE; 3390 PetscCall(ISGetLocalSize(iscol, &n)); 3391 if (!n) { 3392 sameDist[1] = PETSC_TRUE; 3393 } else { 3394 PetscCall(ISGetMinMax(iscol, &i, &j)); 3395 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3396 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3397 } 3398 3399 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3400 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3401 sameRowDist = tsameDist[0]; 3402 } 3403 3404 if (sameRowDist) { 3405 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3406 /* isrow and iscol have same processor distribution as mat */ 3407 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3408 PetscFunctionReturn(PETSC_SUCCESS); 3409 } else { /* sameRowDist */ 3410 /* isrow has same processor distribution as mat */ 3411 if (call == MAT_INITIAL_MATRIX) { 3412 PetscBool sorted; 3413 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3414 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3415 PetscCall(ISGetSize(iscol, &i)); 3416 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3417 3418 PetscCall(ISSorted(iscol_local, &sorted)); 3419 if (sorted) { 3420 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3421 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3422 PetscFunctionReturn(PETSC_SUCCESS); 3423 } 3424 } else { /* call == MAT_REUSE_MATRIX */ 3425 IS iscol_sub; 3426 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3427 if (iscol_sub) { 3428 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3429 PetscFunctionReturn(PETSC_SUCCESS); 3430 } 3431 } 3432 } 3433 } 3434 3435 /* General case: iscol -> iscol_local which has global size of iscol */ 3436 if (call == MAT_REUSE_MATRIX) { 3437 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3438 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3439 } else { 3440 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3441 } 3442 3443 PetscCall(ISGetLocalSize(iscol, &csize)); 3444 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3445 3446 if (call == MAT_INITIAL_MATRIX) { 3447 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3448 PetscCall(ISDestroy(&iscol_local)); 3449 } 3450 PetscFunctionReturn(PETSC_SUCCESS); 3451 } 3452 3453 /*@C 3454 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3455 and "off-diagonal" part of the matrix in CSR format. 3456 3457 Collective 3458 3459 Input Parameters: 3460 + comm - MPI communicator 3461 . A - "diagonal" portion of matrix 3462 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3463 - garray - global index of `B` columns 3464 3465 Output Parameter: 3466 . mat - the matrix, with input `A` as its local diagonal matrix 3467 3468 Level: advanced 3469 3470 Notes: 3471 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3472 3473 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3474 3475 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3476 @*/ 3477 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3478 { 3479 Mat_MPIAIJ *maij; 3480 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3481 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3482 const PetscScalar *oa; 3483 Mat Bnew; 3484 PetscInt m, n, N; 3485 MatType mpi_mat_type; 3486 3487 PetscFunctionBegin; 3488 PetscCall(MatCreate(comm, mat)); 3489 PetscCall(MatGetSize(A, &m, &n)); 3490 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3491 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3492 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3493 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3494 3495 /* Get global columns of mat */ 3496 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3497 3498 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3499 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3500 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3501 PetscCall(MatSetType(*mat, mpi_mat_type)); 3502 3503 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3504 maij = (Mat_MPIAIJ *)(*mat)->data; 3505 3506 (*mat)->preallocated = PETSC_TRUE; 3507 3508 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3509 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3510 3511 /* Set A as diagonal portion of *mat */ 3512 maij->A = A; 3513 3514 nz = oi[m]; 3515 for (i = 0; i < nz; i++) { 3516 col = oj[i]; 3517 oj[i] = garray[col]; 3518 } 3519 3520 /* Set Bnew as off-diagonal portion of *mat */ 3521 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3522 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3523 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3524 bnew = (Mat_SeqAIJ *)Bnew->data; 3525 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3526 maij->B = Bnew; 3527 3528 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3529 3530 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3531 b->free_a = PETSC_FALSE; 3532 b->free_ij = PETSC_FALSE; 3533 PetscCall(MatDestroy(&B)); 3534 3535 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3536 bnew->free_a = PETSC_TRUE; 3537 bnew->free_ij = PETSC_TRUE; 3538 3539 /* condense columns of maij->B */ 3540 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3541 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3542 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3543 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3544 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3545 PetscFunctionReturn(PETSC_SUCCESS); 3546 } 3547 3548 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3549 3550 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3551 { 3552 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3553 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3554 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3555 Mat M, Msub, B = a->B; 3556 MatScalar *aa; 3557 Mat_SeqAIJ *aij; 3558 PetscInt *garray = a->garray, *colsub, Ncols; 3559 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3560 IS iscol_sub, iscmap; 3561 const PetscInt *is_idx, *cmap; 3562 PetscBool allcolumns = PETSC_FALSE; 3563 MPI_Comm comm; 3564 3565 PetscFunctionBegin; 3566 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3567 if (call == MAT_REUSE_MATRIX) { 3568 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3569 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3570 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3571 3572 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3573 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3574 3575 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3576 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3577 3578 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3579 3580 } else { /* call == MAT_INITIAL_MATRIX) */ 3581 PetscBool flg; 3582 3583 PetscCall(ISGetLocalSize(iscol, &n)); 3584 PetscCall(ISGetSize(iscol, &Ncols)); 3585 3586 /* (1) iscol -> nonscalable iscol_local */ 3587 /* Check for special case: each processor gets entire matrix columns */ 3588 PetscCall(ISIdentity(iscol_local, &flg)); 3589 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3590 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3591 if (allcolumns) { 3592 iscol_sub = iscol_local; 3593 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3594 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3595 3596 } else { 3597 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3598 PetscInt *idx, *cmap1, k; 3599 PetscCall(PetscMalloc1(Ncols, &idx)); 3600 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3601 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3602 count = 0; 3603 k = 0; 3604 for (i = 0; i < Ncols; i++) { 3605 j = is_idx[i]; 3606 if (j >= cstart && j < cend) { 3607 /* diagonal part of mat */ 3608 idx[count] = j; 3609 cmap1[count++] = i; /* column index in submat */ 3610 } else if (Bn) { 3611 /* off-diagonal part of mat */ 3612 if (j == garray[k]) { 3613 idx[count] = j; 3614 cmap1[count++] = i; /* column index in submat */ 3615 } else if (j > garray[k]) { 3616 while (j > garray[k] && k < Bn - 1) k++; 3617 if (j == garray[k]) { 3618 idx[count] = j; 3619 cmap1[count++] = i; /* column index in submat */ 3620 } 3621 } 3622 } 3623 } 3624 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3625 3626 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3627 PetscCall(ISGetBlockSize(iscol, &cbs)); 3628 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3629 3630 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3631 } 3632 3633 /* (3) Create sequential Msub */ 3634 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3635 } 3636 3637 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3638 aij = (Mat_SeqAIJ *)(Msub)->data; 3639 ii = aij->i; 3640 PetscCall(ISGetIndices(iscmap, &cmap)); 3641 3642 /* 3643 m - number of local rows 3644 Ncols - number of columns (same on all processors) 3645 rstart - first row in new global matrix generated 3646 */ 3647 PetscCall(MatGetSize(Msub, &m, NULL)); 3648 3649 if (call == MAT_INITIAL_MATRIX) { 3650 /* (4) Create parallel newmat */ 3651 PetscMPIInt rank, size; 3652 PetscInt csize; 3653 3654 PetscCallMPI(MPI_Comm_size(comm, &size)); 3655 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3656 3657 /* 3658 Determine the number of non-zeros in the diagonal and off-diagonal 3659 portions of the matrix in order to do correct preallocation 3660 */ 3661 3662 /* first get start and end of "diagonal" columns */ 3663 PetscCall(ISGetLocalSize(iscol, &csize)); 3664 if (csize == PETSC_DECIDE) { 3665 PetscCall(ISGetSize(isrow, &mglobal)); 3666 if (mglobal == Ncols) { /* square matrix */ 3667 nlocal = m; 3668 } else { 3669 nlocal = Ncols / size + ((Ncols % size) > rank); 3670 } 3671 } else { 3672 nlocal = csize; 3673 } 3674 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3675 rstart = rend - nlocal; 3676 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3677 3678 /* next, compute all the lengths */ 3679 jj = aij->j; 3680 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3681 olens = dlens + m; 3682 for (i = 0; i < m; i++) { 3683 jend = ii[i + 1] - ii[i]; 3684 olen = 0; 3685 dlen = 0; 3686 for (j = 0; j < jend; j++) { 3687 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3688 else dlen++; 3689 jj++; 3690 } 3691 olens[i] = olen; 3692 dlens[i] = dlen; 3693 } 3694 3695 PetscCall(ISGetBlockSize(isrow, &bs)); 3696 PetscCall(ISGetBlockSize(iscol, &cbs)); 3697 3698 PetscCall(MatCreate(comm, &M)); 3699 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3700 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3701 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3702 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3703 PetscCall(PetscFree(dlens)); 3704 3705 } else { /* call == MAT_REUSE_MATRIX */ 3706 M = *newmat; 3707 PetscCall(MatGetLocalSize(M, &i, NULL)); 3708 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3709 PetscCall(MatZeroEntries(M)); 3710 /* 3711 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3712 rather than the slower MatSetValues(). 3713 */ 3714 M->was_assembled = PETSC_TRUE; 3715 M->assembled = PETSC_FALSE; 3716 } 3717 3718 /* (5) Set values of Msub to *newmat */ 3719 PetscCall(PetscMalloc1(count, &colsub)); 3720 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3721 3722 jj = aij->j; 3723 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3724 for (i = 0; i < m; i++) { 3725 row = rstart + i; 3726 nz = ii[i + 1] - ii[i]; 3727 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3728 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3729 jj += nz; 3730 aa += nz; 3731 } 3732 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3733 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3734 3735 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3736 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3737 3738 PetscCall(PetscFree(colsub)); 3739 3740 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3741 if (call == MAT_INITIAL_MATRIX) { 3742 *newmat = M; 3743 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3744 PetscCall(MatDestroy(&Msub)); 3745 3746 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3747 PetscCall(ISDestroy(&iscol_sub)); 3748 3749 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3750 PetscCall(ISDestroy(&iscmap)); 3751 3752 if (iscol_local) { 3753 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3754 PetscCall(ISDestroy(&iscol_local)); 3755 } 3756 } 3757 PetscFunctionReturn(PETSC_SUCCESS); 3758 } 3759 3760 /* 3761 Not great since it makes two copies of the submatrix, first an SeqAIJ 3762 in local and then by concatenating the local matrices the end result. 3763 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3764 3765 This requires a sequential iscol with all indices. 3766 */ 3767 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3768 { 3769 PetscMPIInt rank, size; 3770 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3771 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3772 Mat M, Mreuse; 3773 MatScalar *aa, *vwork; 3774 MPI_Comm comm; 3775 Mat_SeqAIJ *aij; 3776 PetscBool colflag, allcolumns = PETSC_FALSE; 3777 3778 PetscFunctionBegin; 3779 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3780 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3781 PetscCallMPI(MPI_Comm_size(comm, &size)); 3782 3783 /* Check for special case: each processor gets entire matrix columns */ 3784 PetscCall(ISIdentity(iscol, &colflag)); 3785 PetscCall(ISGetLocalSize(iscol, &n)); 3786 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3787 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3788 3789 if (call == MAT_REUSE_MATRIX) { 3790 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3791 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3792 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3793 } else { 3794 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3795 } 3796 3797 /* 3798 m - number of local rows 3799 n - number of columns (same on all processors) 3800 rstart - first row in new global matrix generated 3801 */ 3802 PetscCall(MatGetSize(Mreuse, &m, &n)); 3803 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3804 if (call == MAT_INITIAL_MATRIX) { 3805 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3806 ii = aij->i; 3807 jj = aij->j; 3808 3809 /* 3810 Determine the number of non-zeros in the diagonal and off-diagonal 3811 portions of the matrix in order to do correct preallocation 3812 */ 3813 3814 /* first get start and end of "diagonal" columns */ 3815 if (csize == PETSC_DECIDE) { 3816 PetscCall(ISGetSize(isrow, &mglobal)); 3817 if (mglobal == n) { /* square matrix */ 3818 nlocal = m; 3819 } else { 3820 nlocal = n / size + ((n % size) > rank); 3821 } 3822 } else { 3823 nlocal = csize; 3824 } 3825 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3826 rstart = rend - nlocal; 3827 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3828 3829 /* next, compute all the lengths */ 3830 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3831 olens = dlens + m; 3832 for (i = 0; i < m; i++) { 3833 jend = ii[i + 1] - ii[i]; 3834 olen = 0; 3835 dlen = 0; 3836 for (j = 0; j < jend; j++) { 3837 if (*jj < rstart || *jj >= rend) olen++; 3838 else dlen++; 3839 jj++; 3840 } 3841 olens[i] = olen; 3842 dlens[i] = dlen; 3843 } 3844 PetscCall(MatCreate(comm, &M)); 3845 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3846 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3847 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3848 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3849 PetscCall(PetscFree(dlens)); 3850 } else { 3851 PetscInt ml, nl; 3852 3853 M = *newmat; 3854 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3855 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3856 PetscCall(MatZeroEntries(M)); 3857 /* 3858 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3859 rather than the slower MatSetValues(). 3860 */ 3861 M->was_assembled = PETSC_TRUE; 3862 M->assembled = PETSC_FALSE; 3863 } 3864 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3865 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3866 ii = aij->i; 3867 jj = aij->j; 3868 3869 /* trigger copy to CPU if needed */ 3870 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3871 for (i = 0; i < m; i++) { 3872 row = rstart + i; 3873 nz = ii[i + 1] - ii[i]; 3874 cwork = jj; 3875 jj = PetscSafePointerPlusOffset(jj, nz); 3876 vwork = aa; 3877 aa = PetscSafePointerPlusOffset(aa, nz); 3878 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3879 } 3880 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3881 3882 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3883 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3884 *newmat = M; 3885 3886 /* save submatrix used in processor for next request */ 3887 if (call == MAT_INITIAL_MATRIX) { 3888 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3889 PetscCall(MatDestroy(&Mreuse)); 3890 } 3891 PetscFunctionReturn(PETSC_SUCCESS); 3892 } 3893 3894 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3895 { 3896 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3897 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3898 const PetscInt *JJ; 3899 PetscBool nooffprocentries; 3900 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3901 3902 PetscFunctionBegin; 3903 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3904 3905 PetscCall(PetscLayoutSetUp(B->rmap)); 3906 PetscCall(PetscLayoutSetUp(B->cmap)); 3907 m = B->rmap->n; 3908 cstart = B->cmap->rstart; 3909 cend = B->cmap->rend; 3910 rstart = B->rmap->rstart; 3911 3912 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3913 3914 if (PetscDefined(USE_DEBUG)) { 3915 for (i = 0; i < m; i++) { 3916 nnz = Ii[i + 1] - Ii[i]; 3917 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3918 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3919 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3920 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3921 } 3922 } 3923 3924 for (i = 0; i < m; i++) { 3925 nnz = Ii[i + 1] - Ii[i]; 3926 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3927 nnz_max = PetscMax(nnz_max, nnz); 3928 d = 0; 3929 for (j = 0; j < nnz; j++) { 3930 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3931 } 3932 d_nnz[i] = d; 3933 o_nnz[i] = nnz - d; 3934 } 3935 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3936 PetscCall(PetscFree2(d_nnz, o_nnz)); 3937 3938 for (i = 0; i < m; i++) { 3939 ii = i + rstart; 3940 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES)); 3941 } 3942 nooffprocentries = B->nooffprocentries; 3943 B->nooffprocentries = PETSC_TRUE; 3944 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3945 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3946 B->nooffprocentries = nooffprocentries; 3947 3948 /* count number of entries below block diagonal */ 3949 PetscCall(PetscFree(Aij->ld)); 3950 PetscCall(PetscCalloc1(m, &ld)); 3951 Aij->ld = ld; 3952 for (i = 0; i < m; i++) { 3953 nnz = Ii[i + 1] - Ii[i]; 3954 j = 0; 3955 while (j < nnz && J[j] < cstart) j++; 3956 ld[i] = j; 3957 if (J) J += nnz; 3958 } 3959 3960 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3961 PetscFunctionReturn(PETSC_SUCCESS); 3962 } 3963 3964 /*@ 3965 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3966 (the default parallel PETSc format). 3967 3968 Collective 3969 3970 Input Parameters: 3971 + B - the matrix 3972 . i - the indices into `j` for the start of each local row (indices start with zero) 3973 . j - the column indices for each local row (indices start with zero) 3974 - v - optional values in the matrix 3975 3976 Level: developer 3977 3978 Notes: 3979 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3980 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3981 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3982 3983 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3984 3985 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3986 3987 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3988 3989 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3990 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3991 3992 The format which is used for the sparse matrix input, is equivalent to a 3993 row-major ordering.. i.e for the following matrix, the input data expected is 3994 as shown 3995 .vb 3996 1 0 0 3997 2 0 3 P0 3998 ------- 3999 4 5 6 P1 4000 4001 Process0 [P0] rows_owned=[0,1] 4002 i = {0,1,3} [size = nrow+1 = 2+1] 4003 j = {0,0,2} [size = 3] 4004 v = {1,2,3} [size = 3] 4005 4006 Process1 [P1] rows_owned=[2] 4007 i = {0,3} [size = nrow+1 = 1+1] 4008 j = {0,1,2} [size = 3] 4009 v = {4,5,6} [size = 3] 4010 .ve 4011 4012 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4013 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4014 @*/ 4015 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4016 { 4017 PetscFunctionBegin; 4018 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4019 PetscFunctionReturn(PETSC_SUCCESS); 4020 } 4021 4022 /*@C 4023 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4024 (the default parallel PETSc format). For good matrix assembly performance 4025 the user should preallocate the matrix storage by setting the parameters 4026 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4027 4028 Collective 4029 4030 Input Parameters: 4031 + B - the matrix 4032 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4033 (same value is used for all local rows) 4034 . d_nnz - array containing the number of nonzeros in the various rows of the 4035 DIAGONAL portion of the local submatrix (possibly different for each row) 4036 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4037 The size of this array is equal to the number of local rows, i.e 'm'. 4038 For matrices that will be factored, you must leave room for (and set) 4039 the diagonal entry even if it is zero. 4040 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4041 submatrix (same value is used for all local rows). 4042 - o_nnz - array containing the number of nonzeros in the various rows of the 4043 OFF-DIAGONAL portion of the local submatrix (possibly different for 4044 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4045 structure. The size of this array is equal to the number 4046 of local rows, i.e 'm'. 4047 4048 Example Usage: 4049 Consider the following 8x8 matrix with 34 non-zero values, that is 4050 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4051 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4052 as follows 4053 4054 .vb 4055 1 2 0 | 0 3 0 | 0 4 4056 Proc0 0 5 6 | 7 0 0 | 8 0 4057 9 0 10 | 11 0 0 | 12 0 4058 ------------------------------------- 4059 13 0 14 | 15 16 17 | 0 0 4060 Proc1 0 18 0 | 19 20 21 | 0 0 4061 0 0 0 | 22 23 0 | 24 0 4062 ------------------------------------- 4063 Proc2 25 26 27 | 0 0 28 | 29 0 4064 30 0 0 | 31 32 33 | 0 34 4065 .ve 4066 4067 This can be represented as a collection of submatrices as 4068 .vb 4069 A B C 4070 D E F 4071 G H I 4072 .ve 4073 4074 Where the submatrices A,B,C are owned by proc0, D,E,F are 4075 owned by proc1, G,H,I are owned by proc2. 4076 4077 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4078 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4079 The 'M','N' parameters are 8,8, and have the same values on all procs. 4080 4081 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4082 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4083 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4084 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4085 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4086 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4087 4088 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4089 allocated for every row of the local diagonal submatrix, and `o_nz` 4090 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4091 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4092 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4093 In this case, the values of `d_nz`, `o_nz` are 4094 .vb 4095 proc0 dnz = 2, o_nz = 2 4096 proc1 dnz = 3, o_nz = 2 4097 proc2 dnz = 1, o_nz = 4 4098 .ve 4099 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4100 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4101 for proc3. i.e we are using 12+15+10=37 storage locations to store 4102 34 values. 4103 4104 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4105 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4106 In the above case the values for `d_nnz`, `o_nnz` are 4107 .vb 4108 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4109 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4110 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4111 .ve 4112 Here the space allocated is sum of all the above values i.e 34, and 4113 hence pre-allocation is perfect. 4114 4115 Level: intermediate 4116 4117 Notes: 4118 If the *_nnz parameter is given then the *_nz parameter is ignored 4119 4120 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4121 storage. The stored row and column indices begin with zero. 4122 See [Sparse Matrices](sec_matsparse) for details. 4123 4124 The parallel matrix is partitioned such that the first m0 rows belong to 4125 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4126 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4127 4128 The DIAGONAL portion of the local submatrix of a processor can be defined 4129 as the submatrix which is obtained by extraction the part corresponding to 4130 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4131 first row that belongs to the processor, r2 is the last row belonging to 4132 the this processor, and c1-c2 is range of indices of the local part of a 4133 vector suitable for applying the matrix to. This is an mxn matrix. In the 4134 common case of a square matrix, the row and column ranges are the same and 4135 the DIAGONAL part is also square. The remaining portion of the local 4136 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4137 4138 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4139 4140 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4141 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4142 You can also run with the option `-info` and look for messages with the string 4143 malloc in them to see if additional memory allocation was needed. 4144 4145 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4146 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4147 @*/ 4148 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4149 { 4150 PetscFunctionBegin; 4151 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4152 PetscValidType(B, 1); 4153 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4154 PetscFunctionReturn(PETSC_SUCCESS); 4155 } 4156 4157 /*@ 4158 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4159 CSR format for the local rows. 4160 4161 Collective 4162 4163 Input Parameters: 4164 + comm - MPI communicator 4165 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4166 . n - This value should be the same as the local size used in creating the 4167 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4168 calculated if `N` is given) For square matrices n is almost always `m`. 4169 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4170 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4171 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4172 . j - global column indices 4173 - a - optional matrix values 4174 4175 Output Parameter: 4176 . mat - the matrix 4177 4178 Level: intermediate 4179 4180 Notes: 4181 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4182 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4183 called this routine. Use `MatCreateMPIAIJWithSplitArray()` to avoid needing to copy the arrays. 4184 4185 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4186 4187 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4188 4189 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4190 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4191 4192 The format which is used for the sparse matrix input, is equivalent to a 4193 row-major ordering.. i.e for the following matrix, the input data expected is 4194 as shown 4195 .vb 4196 1 0 0 4197 2 0 3 P0 4198 ------- 4199 4 5 6 P1 4200 4201 Process0 [P0] rows_owned=[0,1] 4202 i = {0,1,3} [size = nrow+1 = 2+1] 4203 j = {0,0,2} [size = 3] 4204 v = {1,2,3} [size = 3] 4205 4206 Process1 [P1] rows_owned=[2] 4207 i = {0,3} [size = nrow+1 = 1+1] 4208 j = {0,1,2} [size = 3] 4209 v = {4,5,6} [size = 3] 4210 .ve 4211 4212 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4213 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4214 @*/ 4215 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4216 { 4217 PetscFunctionBegin; 4218 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4219 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4220 PetscCall(MatCreate(comm, mat)); 4221 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4222 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4223 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4224 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4225 PetscFunctionReturn(PETSC_SUCCESS); 4226 } 4227 4228 /*@ 4229 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4230 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4231 from `MatCreateMPIAIJWithArrays()` 4232 4233 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4234 4235 Collective 4236 4237 Input Parameters: 4238 + mat - the matrix 4239 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4240 . n - This value should be the same as the local size used in creating the 4241 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4242 calculated if N is given) For square matrices n is almost always m. 4243 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4244 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4245 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4246 . J - column indices 4247 - v - matrix values 4248 4249 Level: deprecated 4250 4251 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4252 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4253 @*/ 4254 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4255 { 4256 PetscInt nnz, i; 4257 PetscBool nooffprocentries; 4258 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4259 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4260 PetscScalar *ad, *ao; 4261 PetscInt ldi, Iii, md; 4262 const PetscInt *Adi = Ad->i; 4263 PetscInt *ld = Aij->ld; 4264 4265 PetscFunctionBegin; 4266 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4267 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4268 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4269 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4270 4271 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4272 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4273 4274 for (i = 0; i < m; i++) { 4275 if (PetscDefined(USE_DEBUG)) { 4276 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4277 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4278 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4279 } 4280 } 4281 nnz = Ii[i + 1] - Ii[i]; 4282 Iii = Ii[i]; 4283 ldi = ld[i]; 4284 md = Adi[i + 1] - Adi[i]; 4285 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4286 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4287 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4288 ad += md; 4289 ao += nnz - md; 4290 } 4291 nooffprocentries = mat->nooffprocentries; 4292 mat->nooffprocentries = PETSC_TRUE; 4293 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4294 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4295 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4296 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4297 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4298 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4299 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4300 mat->nooffprocentries = nooffprocentries; 4301 PetscFunctionReturn(PETSC_SUCCESS); 4302 } 4303 4304 /*@ 4305 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4306 4307 Collective 4308 4309 Input Parameters: 4310 + mat - the matrix 4311 - v - matrix values, stored by row 4312 4313 Level: intermediate 4314 4315 Notes: 4316 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4317 4318 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4319 4320 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4321 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4322 @*/ 4323 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4324 { 4325 PetscInt nnz, i, m; 4326 PetscBool nooffprocentries; 4327 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4328 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4329 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4330 PetscScalar *ad, *ao; 4331 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4332 PetscInt ldi, Iii, md; 4333 PetscInt *ld = Aij->ld; 4334 4335 PetscFunctionBegin; 4336 m = mat->rmap->n; 4337 4338 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4339 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4340 Iii = 0; 4341 for (i = 0; i < m; i++) { 4342 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4343 ldi = ld[i]; 4344 md = Adi[i + 1] - Adi[i]; 4345 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4346 ad += md; 4347 if (ao) { 4348 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4349 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4350 ao += nnz - md; 4351 } 4352 Iii += nnz; 4353 } 4354 nooffprocentries = mat->nooffprocentries; 4355 mat->nooffprocentries = PETSC_TRUE; 4356 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4357 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4358 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4359 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4360 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4361 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4362 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4363 mat->nooffprocentries = nooffprocentries; 4364 PetscFunctionReturn(PETSC_SUCCESS); 4365 } 4366 4367 /*@C 4368 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4369 (the default parallel PETSc format). For good matrix assembly performance 4370 the user should preallocate the matrix storage by setting the parameters 4371 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4372 4373 Collective 4374 4375 Input Parameters: 4376 + comm - MPI communicator 4377 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4378 This value should be the same as the local size used in creating the 4379 y vector for the matrix-vector product y = Ax. 4380 . n - This value should be the same as the local size used in creating the 4381 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4382 calculated if N is given) For square matrices n is almost always m. 4383 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4384 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4385 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4386 (same value is used for all local rows) 4387 . d_nnz - array containing the number of nonzeros in the various rows of the 4388 DIAGONAL portion of the local submatrix (possibly different for each row) 4389 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4390 The size of this array is equal to the number of local rows, i.e 'm'. 4391 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4392 submatrix (same value is used for all local rows). 4393 - o_nnz - array containing the number of nonzeros in the various rows of the 4394 OFF-DIAGONAL portion of the local submatrix (possibly different for 4395 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4396 structure. The size of this array is equal to the number 4397 of local rows, i.e 'm'. 4398 4399 Output Parameter: 4400 . A - the matrix 4401 4402 Options Database Keys: 4403 + -mat_no_inode - Do not use inodes 4404 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4405 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4406 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4407 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4408 4409 Level: intermediate 4410 4411 Notes: 4412 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4413 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4414 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4415 4416 If the *_nnz parameter is given then the *_nz parameter is ignored 4417 4418 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4419 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4420 storage requirements for this matrix. 4421 4422 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4423 processor than it must be used on all processors that share the object for 4424 that argument. 4425 4426 The user MUST specify either the local or global matrix dimensions 4427 (possibly both). 4428 4429 The parallel matrix is partitioned across processors such that the 4430 first m0 rows belong to process 0, the next m1 rows belong to 4431 process 1, the next m2 rows belong to process 2 etc.. where 4432 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4433 values corresponding to [m x N] submatrix. 4434 4435 The columns are logically partitioned with the n0 columns belonging 4436 to 0th partition, the next n1 columns belonging to the next 4437 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4438 4439 The DIAGONAL portion of the local submatrix on any given processor 4440 is the submatrix corresponding to the rows and columns m,n 4441 corresponding to the given processor. i.e diagonal matrix on 4442 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4443 etc. The remaining portion of the local submatrix [m x (N-n)] 4444 constitute the OFF-DIAGONAL portion. The example below better 4445 illustrates this concept. 4446 4447 For a square global matrix we define each processor's diagonal portion 4448 to be its local rows and the corresponding columns (a square submatrix); 4449 each processor's off-diagonal portion encompasses the remainder of the 4450 local matrix (a rectangular submatrix). 4451 4452 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4453 4454 When calling this routine with a single process communicator, a matrix of 4455 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4456 type of communicator, use the construction mechanism 4457 .vb 4458 MatCreate(..., &A); 4459 MatSetType(A, MATMPIAIJ); 4460 MatSetSizes(A, m, n, M, N); 4461 MatMPIAIJSetPreallocation(A, ...); 4462 .ve 4463 4464 By default, this format uses inodes (identical nodes) when possible. 4465 We search for consecutive rows with the same nonzero structure, thereby 4466 reusing matrix information to achieve increased efficiency. 4467 4468 Example Usage: 4469 Consider the following 8x8 matrix with 34 non-zero values, that is 4470 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4471 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4472 as follows 4473 4474 .vb 4475 1 2 0 | 0 3 0 | 0 4 4476 Proc0 0 5 6 | 7 0 0 | 8 0 4477 9 0 10 | 11 0 0 | 12 0 4478 ------------------------------------- 4479 13 0 14 | 15 16 17 | 0 0 4480 Proc1 0 18 0 | 19 20 21 | 0 0 4481 0 0 0 | 22 23 0 | 24 0 4482 ------------------------------------- 4483 Proc2 25 26 27 | 0 0 28 | 29 0 4484 30 0 0 | 31 32 33 | 0 34 4485 .ve 4486 4487 This can be represented as a collection of submatrices as 4488 4489 .vb 4490 A B C 4491 D E F 4492 G H I 4493 .ve 4494 4495 Where the submatrices A,B,C are owned by proc0, D,E,F are 4496 owned by proc1, G,H,I are owned by proc2. 4497 4498 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4499 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4500 The 'M','N' parameters are 8,8, and have the same values on all procs. 4501 4502 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4503 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4504 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4505 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4506 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4507 matrix, ans [DF] as another SeqAIJ matrix. 4508 4509 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4510 allocated for every row of the local diagonal submatrix, and `o_nz` 4511 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4512 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4513 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4514 In this case, the values of `d_nz`,`o_nz` are 4515 .vb 4516 proc0 dnz = 2, o_nz = 2 4517 proc1 dnz = 3, o_nz = 2 4518 proc2 dnz = 1, o_nz = 4 4519 .ve 4520 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4521 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4522 for proc3. i.e we are using 12+15+10=37 storage locations to store 4523 34 values. 4524 4525 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4526 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4527 In the above case the values for d_nnz,o_nnz are 4528 .vb 4529 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4530 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4531 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4532 .ve 4533 Here the space allocated is sum of all the above values i.e 34, and 4534 hence pre-allocation is perfect. 4535 4536 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4537 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4538 @*/ 4539 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4540 { 4541 PetscMPIInt size; 4542 4543 PetscFunctionBegin; 4544 PetscCall(MatCreate(comm, A)); 4545 PetscCall(MatSetSizes(*A, m, n, M, N)); 4546 PetscCallMPI(MPI_Comm_size(comm, &size)); 4547 if (size > 1) { 4548 PetscCall(MatSetType(*A, MATMPIAIJ)); 4549 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4550 } else { 4551 PetscCall(MatSetType(*A, MATSEQAIJ)); 4552 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4553 } 4554 PetscFunctionReturn(PETSC_SUCCESS); 4555 } 4556 4557 /*MC 4558 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4559 4560 Synopsis: 4561 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4562 4563 Not Collective 4564 4565 Input Parameter: 4566 . A - the `MATMPIAIJ` matrix 4567 4568 Output Parameters: 4569 + Ad - the diagonal portion of the matrix 4570 . Ao - the off-diagonal portion of the matrix 4571 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4572 - ierr - error code 4573 4574 Level: advanced 4575 4576 Note: 4577 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4578 4579 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4580 M*/ 4581 4582 /*MC 4583 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4584 4585 Synopsis: 4586 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4587 4588 Not Collective 4589 4590 Input Parameters: 4591 + A - the `MATMPIAIJ` matrix 4592 . Ad - the diagonal portion of the matrix 4593 . Ao - the off-diagonal portion of the matrix 4594 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4595 - ierr - error code 4596 4597 Level: advanced 4598 4599 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4600 M*/ 4601 4602 /*@C 4603 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4604 4605 Not Collective 4606 4607 Input Parameter: 4608 . A - The `MATMPIAIJ` matrix 4609 4610 Output Parameters: 4611 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4612 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4613 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4614 4615 Level: intermediate 4616 4617 Note: 4618 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4619 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4620 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4621 local column numbers to global column numbers in the original matrix. 4622 4623 Fortran Notes: 4624 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4625 4626 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4627 @*/ 4628 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4629 { 4630 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4631 PetscBool flg; 4632 4633 PetscFunctionBegin; 4634 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4635 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4636 if (Ad) *Ad = a->A; 4637 if (Ao) *Ao = a->B; 4638 if (colmap) *colmap = a->garray; 4639 PetscFunctionReturn(PETSC_SUCCESS); 4640 } 4641 4642 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4643 { 4644 PetscInt m, N, i, rstart, nnz, Ii; 4645 PetscInt *indx; 4646 PetscScalar *values; 4647 MatType rootType; 4648 4649 PetscFunctionBegin; 4650 PetscCall(MatGetSize(inmat, &m, &N)); 4651 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4652 PetscInt *dnz, *onz, sum, bs, cbs; 4653 4654 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4655 /* Check sum(n) = N */ 4656 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4657 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4658 4659 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4660 rstart -= m; 4661 4662 MatPreallocateBegin(comm, m, n, dnz, onz); 4663 for (i = 0; i < m; i++) { 4664 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4665 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4666 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4667 } 4668 4669 PetscCall(MatCreate(comm, outmat)); 4670 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4671 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4672 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4673 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4674 PetscCall(MatSetType(*outmat, rootType)); 4675 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4676 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4677 MatPreallocateEnd(dnz, onz); 4678 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4679 } 4680 4681 /* numeric phase */ 4682 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4683 for (i = 0; i < m; i++) { 4684 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4685 Ii = i + rstart; 4686 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4687 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4688 } 4689 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4690 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4691 PetscFunctionReturn(PETSC_SUCCESS); 4692 } 4693 4694 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4695 { 4696 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4697 4698 PetscFunctionBegin; 4699 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4700 PetscCall(PetscFree(merge->id_r)); 4701 PetscCall(PetscFree(merge->len_s)); 4702 PetscCall(PetscFree(merge->len_r)); 4703 PetscCall(PetscFree(merge->bi)); 4704 PetscCall(PetscFree(merge->bj)); 4705 PetscCall(PetscFree(merge->buf_ri[0])); 4706 PetscCall(PetscFree(merge->buf_ri)); 4707 PetscCall(PetscFree(merge->buf_rj[0])); 4708 PetscCall(PetscFree(merge->buf_rj)); 4709 PetscCall(PetscFree(merge->coi)); 4710 PetscCall(PetscFree(merge->coj)); 4711 PetscCall(PetscFree(merge->owners_co)); 4712 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4713 PetscCall(PetscFree(merge)); 4714 PetscFunctionReturn(PETSC_SUCCESS); 4715 } 4716 4717 #include <../src/mat/utils/freespace.h> 4718 #include <petscbt.h> 4719 4720 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4721 { 4722 MPI_Comm comm; 4723 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4724 PetscMPIInt size, rank, taga, *len_s; 4725 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4726 PetscInt proc, m; 4727 PetscInt **buf_ri, **buf_rj; 4728 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4729 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4730 MPI_Request *s_waits, *r_waits; 4731 MPI_Status *status; 4732 const MatScalar *aa, *a_a; 4733 MatScalar **abuf_r, *ba_i; 4734 Mat_Merge_SeqsToMPI *merge; 4735 PetscContainer container; 4736 4737 PetscFunctionBegin; 4738 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4739 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4740 4741 PetscCallMPI(MPI_Comm_size(comm, &size)); 4742 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4743 4744 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4745 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4746 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4747 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4748 aa = a_a; 4749 4750 bi = merge->bi; 4751 bj = merge->bj; 4752 buf_ri = merge->buf_ri; 4753 buf_rj = merge->buf_rj; 4754 4755 PetscCall(PetscMalloc1(size, &status)); 4756 owners = merge->rowmap->range; 4757 len_s = merge->len_s; 4758 4759 /* send and recv matrix values */ 4760 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4761 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4762 4763 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4764 for (proc = 0, k = 0; proc < size; proc++) { 4765 if (!len_s[proc]) continue; 4766 i = owners[proc]; 4767 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4768 k++; 4769 } 4770 4771 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4772 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4773 PetscCall(PetscFree(status)); 4774 4775 PetscCall(PetscFree(s_waits)); 4776 PetscCall(PetscFree(r_waits)); 4777 4778 /* insert mat values of mpimat */ 4779 PetscCall(PetscMalloc1(N, &ba_i)); 4780 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4781 4782 for (k = 0; k < merge->nrecv; k++) { 4783 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4784 nrows = *(buf_ri_k[k]); 4785 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4786 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4787 } 4788 4789 /* set values of ba */ 4790 m = merge->rowmap->n; 4791 for (i = 0; i < m; i++) { 4792 arow = owners[rank] + i; 4793 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4794 bnzi = bi[i + 1] - bi[i]; 4795 PetscCall(PetscArrayzero(ba_i, bnzi)); 4796 4797 /* add local non-zero vals of this proc's seqmat into ba */ 4798 anzi = ai[arow + 1] - ai[arow]; 4799 aj = a->j + ai[arow]; 4800 aa = a_a + ai[arow]; 4801 nextaj = 0; 4802 for (j = 0; nextaj < anzi; j++) { 4803 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4804 ba_i[j] += aa[nextaj++]; 4805 } 4806 } 4807 4808 /* add received vals into ba */ 4809 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4810 /* i-th row */ 4811 if (i == *nextrow[k]) { 4812 anzi = *(nextai[k] + 1) - *nextai[k]; 4813 aj = buf_rj[k] + *(nextai[k]); 4814 aa = abuf_r[k] + *(nextai[k]); 4815 nextaj = 0; 4816 for (j = 0; nextaj < anzi; j++) { 4817 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4818 ba_i[j] += aa[nextaj++]; 4819 } 4820 } 4821 nextrow[k]++; 4822 nextai[k]++; 4823 } 4824 } 4825 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4826 } 4827 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4828 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4829 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4830 4831 PetscCall(PetscFree(abuf_r[0])); 4832 PetscCall(PetscFree(abuf_r)); 4833 PetscCall(PetscFree(ba_i)); 4834 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4835 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4836 PetscFunctionReturn(PETSC_SUCCESS); 4837 } 4838 4839 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4840 { 4841 Mat B_mpi; 4842 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4843 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4844 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4845 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4846 PetscInt len, proc, *dnz, *onz, bs, cbs; 4847 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4848 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4849 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4850 MPI_Status *status; 4851 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4852 PetscBT lnkbt; 4853 Mat_Merge_SeqsToMPI *merge; 4854 PetscContainer container; 4855 4856 PetscFunctionBegin; 4857 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4858 4859 /* make sure it is a PETSc comm */ 4860 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4861 PetscCallMPI(MPI_Comm_size(comm, &size)); 4862 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4863 4864 PetscCall(PetscNew(&merge)); 4865 PetscCall(PetscMalloc1(size, &status)); 4866 4867 /* determine row ownership */ 4868 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4869 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4870 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4871 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4872 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4873 PetscCall(PetscMalloc1(size, &len_si)); 4874 PetscCall(PetscMalloc1(size, &merge->len_s)); 4875 4876 m = merge->rowmap->n; 4877 owners = merge->rowmap->range; 4878 4879 /* determine the number of messages to send, their lengths */ 4880 len_s = merge->len_s; 4881 4882 len = 0; /* length of buf_si[] */ 4883 merge->nsend = 0; 4884 for (proc = 0; proc < size; proc++) { 4885 len_si[proc] = 0; 4886 if (proc == rank) { 4887 len_s[proc] = 0; 4888 } else { 4889 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4890 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4891 } 4892 if (len_s[proc]) { 4893 merge->nsend++; 4894 nrows = 0; 4895 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4896 if (ai[i + 1] > ai[i]) nrows++; 4897 } 4898 len_si[proc] = 2 * (nrows + 1); 4899 len += len_si[proc]; 4900 } 4901 } 4902 4903 /* determine the number and length of messages to receive for ij-structure */ 4904 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4905 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4906 4907 /* post the Irecv of j-structure */ 4908 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4909 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4910 4911 /* post the Isend of j-structure */ 4912 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4913 4914 for (proc = 0, k = 0; proc < size; proc++) { 4915 if (!len_s[proc]) continue; 4916 i = owners[proc]; 4917 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4918 k++; 4919 } 4920 4921 /* receives and sends of j-structure are complete */ 4922 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4923 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4924 4925 /* send and recv i-structure */ 4926 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4927 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4928 4929 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4930 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4931 for (proc = 0, k = 0; proc < size; proc++) { 4932 if (!len_s[proc]) continue; 4933 /* form outgoing message for i-structure: 4934 buf_si[0]: nrows to be sent 4935 [1:nrows]: row index (global) 4936 [nrows+1:2*nrows+1]: i-structure index 4937 */ 4938 nrows = len_si[proc] / 2 - 1; 4939 buf_si_i = buf_si + nrows + 1; 4940 buf_si[0] = nrows; 4941 buf_si_i[0] = 0; 4942 nrows = 0; 4943 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4944 anzi = ai[i + 1] - ai[i]; 4945 if (anzi) { 4946 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4947 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4948 nrows++; 4949 } 4950 } 4951 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4952 k++; 4953 buf_si += len_si[proc]; 4954 } 4955 4956 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4957 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4958 4959 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4960 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4961 4962 PetscCall(PetscFree(len_si)); 4963 PetscCall(PetscFree(len_ri)); 4964 PetscCall(PetscFree(rj_waits)); 4965 PetscCall(PetscFree2(si_waits, sj_waits)); 4966 PetscCall(PetscFree(ri_waits)); 4967 PetscCall(PetscFree(buf_s)); 4968 PetscCall(PetscFree(status)); 4969 4970 /* compute a local seq matrix in each processor */ 4971 /* allocate bi array and free space for accumulating nonzero column info */ 4972 PetscCall(PetscMalloc1(m + 1, &bi)); 4973 bi[0] = 0; 4974 4975 /* create and initialize a linked list */ 4976 nlnk = N + 1; 4977 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4978 4979 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4980 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4981 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4982 4983 current_space = free_space; 4984 4985 /* determine symbolic info for each local row */ 4986 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4987 4988 for (k = 0; k < merge->nrecv; k++) { 4989 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4990 nrows = *buf_ri_k[k]; 4991 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4992 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4993 } 4994 4995 MatPreallocateBegin(comm, m, n, dnz, onz); 4996 len = 0; 4997 for (i = 0; i < m; i++) { 4998 bnzi = 0; 4999 /* add local non-zero cols of this proc's seqmat into lnk */ 5000 arow = owners[rank] + i; 5001 anzi = ai[arow + 1] - ai[arow]; 5002 aj = a->j + ai[arow]; 5003 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5004 bnzi += nlnk; 5005 /* add received col data into lnk */ 5006 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5007 if (i == *nextrow[k]) { /* i-th row */ 5008 anzi = *(nextai[k] + 1) - *nextai[k]; 5009 aj = buf_rj[k] + *nextai[k]; 5010 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5011 bnzi += nlnk; 5012 nextrow[k]++; 5013 nextai[k]++; 5014 } 5015 } 5016 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5017 5018 /* if free space is not available, make more free space */ 5019 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5020 /* copy data into free space, then initialize lnk */ 5021 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5022 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5023 5024 current_space->array += bnzi; 5025 current_space->local_used += bnzi; 5026 current_space->local_remaining -= bnzi; 5027 5028 bi[i + 1] = bi[i] + bnzi; 5029 } 5030 5031 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5032 5033 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5034 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5035 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5036 5037 /* create symbolic parallel matrix B_mpi */ 5038 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5039 PetscCall(MatCreate(comm, &B_mpi)); 5040 if (n == PETSC_DECIDE) { 5041 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5042 } else { 5043 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5044 } 5045 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5046 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5047 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5048 MatPreallocateEnd(dnz, onz); 5049 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5050 5051 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5052 B_mpi->assembled = PETSC_FALSE; 5053 merge->bi = bi; 5054 merge->bj = bj; 5055 merge->buf_ri = buf_ri; 5056 merge->buf_rj = buf_rj; 5057 merge->coi = NULL; 5058 merge->coj = NULL; 5059 merge->owners_co = NULL; 5060 5061 PetscCall(PetscCommDestroy(&comm)); 5062 5063 /* attach the supporting struct to B_mpi for reuse */ 5064 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5065 PetscCall(PetscContainerSetPointer(container, merge)); 5066 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5067 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5068 PetscCall(PetscContainerDestroy(&container)); 5069 *mpimat = B_mpi; 5070 5071 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5072 PetscFunctionReturn(PETSC_SUCCESS); 5073 } 5074 5075 /*@C 5076 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5077 matrices from each processor 5078 5079 Collective 5080 5081 Input Parameters: 5082 + comm - the communicators the parallel matrix will live on 5083 . seqmat - the input sequential matrices 5084 . m - number of local rows (or `PETSC_DECIDE`) 5085 . n - number of local columns (or `PETSC_DECIDE`) 5086 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5087 5088 Output Parameter: 5089 . mpimat - the parallel matrix generated 5090 5091 Level: advanced 5092 5093 Note: 5094 The dimensions of the sequential matrix in each processor MUST be the same. 5095 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5096 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5097 5098 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5099 @*/ 5100 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5101 { 5102 PetscMPIInt size; 5103 5104 PetscFunctionBegin; 5105 PetscCallMPI(MPI_Comm_size(comm, &size)); 5106 if (size == 1) { 5107 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5108 if (scall == MAT_INITIAL_MATRIX) { 5109 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5110 } else { 5111 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5112 } 5113 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5114 PetscFunctionReturn(PETSC_SUCCESS); 5115 } 5116 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5117 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5118 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5119 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5120 PetscFunctionReturn(PETSC_SUCCESS); 5121 } 5122 5123 /*@ 5124 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5125 5126 Not Collective 5127 5128 Input Parameter: 5129 . A - the matrix 5130 5131 Output Parameter: 5132 . A_loc - the local sequential matrix generated 5133 5134 Level: developer 5135 5136 Notes: 5137 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5138 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5139 `n` is the global column count obtained with `MatGetSize()` 5140 5141 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5142 5143 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5144 5145 Destroy the matrix with `MatDestroy()` 5146 5147 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5148 @*/ 5149 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5150 { 5151 PetscBool mpi; 5152 5153 PetscFunctionBegin; 5154 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5155 if (mpi) { 5156 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5157 } else { 5158 *A_loc = A; 5159 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5160 } 5161 PetscFunctionReturn(PETSC_SUCCESS); 5162 } 5163 5164 /*@ 5165 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5166 5167 Not Collective 5168 5169 Input Parameters: 5170 + A - the matrix 5171 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5172 5173 Output Parameter: 5174 . A_loc - the local sequential matrix generated 5175 5176 Level: developer 5177 5178 Notes: 5179 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5180 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5181 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5182 5183 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5184 5185 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5186 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5187 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5188 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5189 5190 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5191 @*/ 5192 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5193 { 5194 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5195 Mat_SeqAIJ *mat, *a, *b; 5196 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5197 const PetscScalar *aa, *ba, *aav, *bav; 5198 PetscScalar *ca, *cam; 5199 PetscMPIInt size; 5200 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5201 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5202 PetscBool match; 5203 5204 PetscFunctionBegin; 5205 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5206 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5207 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5208 if (size == 1) { 5209 if (scall == MAT_INITIAL_MATRIX) { 5210 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5211 *A_loc = mpimat->A; 5212 } else if (scall == MAT_REUSE_MATRIX) { 5213 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5214 } 5215 PetscFunctionReturn(PETSC_SUCCESS); 5216 } 5217 5218 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5219 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5220 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5221 ai = a->i; 5222 aj = a->j; 5223 bi = b->i; 5224 bj = b->j; 5225 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5226 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5227 aa = aav; 5228 ba = bav; 5229 if (scall == MAT_INITIAL_MATRIX) { 5230 PetscCall(PetscMalloc1(1 + am, &ci)); 5231 ci[0] = 0; 5232 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5233 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5234 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5235 k = 0; 5236 for (i = 0; i < am; i++) { 5237 ncols_o = bi[i + 1] - bi[i]; 5238 ncols_d = ai[i + 1] - ai[i]; 5239 /* off-diagonal portion of A */ 5240 for (jo = 0; jo < ncols_o; jo++) { 5241 col = cmap[*bj]; 5242 if (col >= cstart) break; 5243 cj[k] = col; 5244 bj++; 5245 ca[k++] = *ba++; 5246 } 5247 /* diagonal portion of A */ 5248 for (j = 0; j < ncols_d; j++) { 5249 cj[k] = cstart + *aj++; 5250 ca[k++] = *aa++; 5251 } 5252 /* off-diagonal portion of A */ 5253 for (j = jo; j < ncols_o; j++) { 5254 cj[k] = cmap[*bj++]; 5255 ca[k++] = *ba++; 5256 } 5257 } 5258 /* put together the new matrix */ 5259 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5260 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5261 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5262 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5263 mat->free_a = PETSC_TRUE; 5264 mat->free_ij = PETSC_TRUE; 5265 mat->nonew = 0; 5266 } else if (scall == MAT_REUSE_MATRIX) { 5267 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5268 ci = mat->i; 5269 cj = mat->j; 5270 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5271 for (i = 0; i < am; i++) { 5272 /* off-diagonal portion of A */ 5273 ncols_o = bi[i + 1] - bi[i]; 5274 for (jo = 0; jo < ncols_o; jo++) { 5275 col = cmap[*bj]; 5276 if (col >= cstart) break; 5277 *cam++ = *ba++; 5278 bj++; 5279 } 5280 /* diagonal portion of A */ 5281 ncols_d = ai[i + 1] - ai[i]; 5282 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5283 /* off-diagonal portion of A */ 5284 for (j = jo; j < ncols_o; j++) { 5285 *cam++ = *ba++; 5286 bj++; 5287 } 5288 } 5289 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5290 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5291 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5292 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5293 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5294 PetscFunctionReturn(PETSC_SUCCESS); 5295 } 5296 5297 /*@ 5298 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5299 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5300 5301 Not Collective 5302 5303 Input Parameters: 5304 + A - the matrix 5305 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5306 5307 Output Parameters: 5308 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5309 - A_loc - the local sequential matrix generated 5310 5311 Level: developer 5312 5313 Note: 5314 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5315 part, then those associated with the off-diagonal part (in its local ordering) 5316 5317 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5318 @*/ 5319 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5320 { 5321 Mat Ao, Ad; 5322 const PetscInt *cmap; 5323 PetscMPIInt size; 5324 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5325 5326 PetscFunctionBegin; 5327 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5328 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5329 if (size == 1) { 5330 if (scall == MAT_INITIAL_MATRIX) { 5331 PetscCall(PetscObjectReference((PetscObject)Ad)); 5332 *A_loc = Ad; 5333 } else if (scall == MAT_REUSE_MATRIX) { 5334 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5335 } 5336 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5337 PetscFunctionReturn(PETSC_SUCCESS); 5338 } 5339 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5340 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5341 if (f) { 5342 PetscCall((*f)(A, scall, glob, A_loc)); 5343 } else { 5344 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5345 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5346 Mat_SeqAIJ *c; 5347 PetscInt *ai = a->i, *aj = a->j; 5348 PetscInt *bi = b->i, *bj = b->j; 5349 PetscInt *ci, *cj; 5350 const PetscScalar *aa, *ba; 5351 PetscScalar *ca; 5352 PetscInt i, j, am, dn, on; 5353 5354 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5355 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5356 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5357 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5358 if (scall == MAT_INITIAL_MATRIX) { 5359 PetscInt k; 5360 PetscCall(PetscMalloc1(1 + am, &ci)); 5361 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5362 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5363 ci[0] = 0; 5364 for (i = 0, k = 0; i < am; i++) { 5365 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5366 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5367 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5368 /* diagonal portion of A */ 5369 for (j = 0; j < ncols_d; j++, k++) { 5370 cj[k] = *aj++; 5371 ca[k] = *aa++; 5372 } 5373 /* off-diagonal portion of A */ 5374 for (j = 0; j < ncols_o; j++, k++) { 5375 cj[k] = dn + *bj++; 5376 ca[k] = *ba++; 5377 } 5378 } 5379 /* put together the new matrix */ 5380 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5381 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5382 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5383 c = (Mat_SeqAIJ *)(*A_loc)->data; 5384 c->free_a = PETSC_TRUE; 5385 c->free_ij = PETSC_TRUE; 5386 c->nonew = 0; 5387 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5388 } else if (scall == MAT_REUSE_MATRIX) { 5389 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5390 for (i = 0; i < am; i++) { 5391 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5392 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5393 /* diagonal portion of A */ 5394 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5395 /* off-diagonal portion of A */ 5396 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5397 } 5398 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5399 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5400 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5401 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5402 if (glob) { 5403 PetscInt cst, *gidx; 5404 5405 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5406 PetscCall(PetscMalloc1(dn + on, &gidx)); 5407 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5408 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5409 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5410 } 5411 } 5412 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5413 PetscFunctionReturn(PETSC_SUCCESS); 5414 } 5415 5416 /*@C 5417 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5418 5419 Not Collective 5420 5421 Input Parameters: 5422 + A - the matrix 5423 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5424 . row - index set of rows to extract (or `NULL`) 5425 - col - index set of columns to extract (or `NULL`) 5426 5427 Output Parameter: 5428 . A_loc - the local sequential matrix generated 5429 5430 Level: developer 5431 5432 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5433 @*/ 5434 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5435 { 5436 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5437 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5438 IS isrowa, iscola; 5439 Mat *aloc; 5440 PetscBool match; 5441 5442 PetscFunctionBegin; 5443 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5444 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5445 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5446 if (!row) { 5447 start = A->rmap->rstart; 5448 end = A->rmap->rend; 5449 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5450 } else { 5451 isrowa = *row; 5452 } 5453 if (!col) { 5454 start = A->cmap->rstart; 5455 cmap = a->garray; 5456 nzA = a->A->cmap->n; 5457 nzB = a->B->cmap->n; 5458 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5459 ncols = 0; 5460 for (i = 0; i < nzB; i++) { 5461 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5462 else break; 5463 } 5464 imark = i; 5465 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5466 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5467 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5468 } else { 5469 iscola = *col; 5470 } 5471 if (scall != MAT_INITIAL_MATRIX) { 5472 PetscCall(PetscMalloc1(1, &aloc)); 5473 aloc[0] = *A_loc; 5474 } 5475 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5476 if (!col) { /* attach global id of condensed columns */ 5477 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5478 } 5479 *A_loc = aloc[0]; 5480 PetscCall(PetscFree(aloc)); 5481 if (!row) PetscCall(ISDestroy(&isrowa)); 5482 if (!col) PetscCall(ISDestroy(&iscola)); 5483 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5484 PetscFunctionReturn(PETSC_SUCCESS); 5485 } 5486 5487 /* 5488 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5489 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5490 * on a global size. 5491 * */ 5492 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5493 { 5494 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5495 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5496 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5497 PetscMPIInt owner; 5498 PetscSFNode *iremote, *oiremote; 5499 const PetscInt *lrowindices; 5500 PetscSF sf, osf; 5501 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5502 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5503 MPI_Comm comm; 5504 ISLocalToGlobalMapping mapping; 5505 const PetscScalar *pd_a, *po_a; 5506 5507 PetscFunctionBegin; 5508 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5509 /* plocalsize is the number of roots 5510 * nrows is the number of leaves 5511 * */ 5512 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5513 PetscCall(ISGetLocalSize(rows, &nrows)); 5514 PetscCall(PetscCalloc1(nrows, &iremote)); 5515 PetscCall(ISGetIndices(rows, &lrowindices)); 5516 for (i = 0; i < nrows; i++) { 5517 /* Find a remote index and an owner for a row 5518 * The row could be local or remote 5519 * */ 5520 owner = 0; 5521 lidx = 0; 5522 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5523 iremote[i].index = lidx; 5524 iremote[i].rank = owner; 5525 } 5526 /* Create SF to communicate how many nonzero columns for each row */ 5527 PetscCall(PetscSFCreate(comm, &sf)); 5528 /* SF will figure out the number of nonzero columns for each row, and their 5529 * offsets 5530 * */ 5531 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5532 PetscCall(PetscSFSetFromOptions(sf)); 5533 PetscCall(PetscSFSetUp(sf)); 5534 5535 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5536 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5537 PetscCall(PetscCalloc1(nrows, &pnnz)); 5538 roffsets[0] = 0; 5539 roffsets[1] = 0; 5540 for (i = 0; i < plocalsize; i++) { 5541 /* diagonal */ 5542 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5543 /* off-diagonal */ 5544 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5545 /* compute offsets so that we relative location for each row */ 5546 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5547 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5548 } 5549 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5550 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5551 /* 'r' means root, and 'l' means leaf */ 5552 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5553 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5554 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5555 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5556 PetscCall(PetscSFDestroy(&sf)); 5557 PetscCall(PetscFree(roffsets)); 5558 PetscCall(PetscFree(nrcols)); 5559 dntotalcols = 0; 5560 ontotalcols = 0; 5561 ncol = 0; 5562 for (i = 0; i < nrows; i++) { 5563 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5564 ncol = PetscMax(pnnz[i], ncol); 5565 /* diagonal */ 5566 dntotalcols += nlcols[i * 2 + 0]; 5567 /* off-diagonal */ 5568 ontotalcols += nlcols[i * 2 + 1]; 5569 } 5570 /* We do not need to figure the right number of columns 5571 * since all the calculations will be done by going through the raw data 5572 * */ 5573 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5574 PetscCall(MatSetUp(*P_oth)); 5575 PetscCall(PetscFree(pnnz)); 5576 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5577 /* diagonal */ 5578 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5579 /* off-diagonal */ 5580 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5581 /* diagonal */ 5582 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5583 /* off-diagonal */ 5584 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5585 dntotalcols = 0; 5586 ontotalcols = 0; 5587 ntotalcols = 0; 5588 for (i = 0; i < nrows; i++) { 5589 owner = 0; 5590 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5591 /* Set iremote for diag matrix */ 5592 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5593 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5594 iremote[dntotalcols].rank = owner; 5595 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5596 ilocal[dntotalcols++] = ntotalcols++; 5597 } 5598 /* off-diagonal */ 5599 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5600 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5601 oiremote[ontotalcols].rank = owner; 5602 oilocal[ontotalcols++] = ntotalcols++; 5603 } 5604 } 5605 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5606 PetscCall(PetscFree(loffsets)); 5607 PetscCall(PetscFree(nlcols)); 5608 PetscCall(PetscSFCreate(comm, &sf)); 5609 /* P serves as roots and P_oth is leaves 5610 * Diag matrix 5611 * */ 5612 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5613 PetscCall(PetscSFSetFromOptions(sf)); 5614 PetscCall(PetscSFSetUp(sf)); 5615 5616 PetscCall(PetscSFCreate(comm, &osf)); 5617 /* off-diagonal */ 5618 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5619 PetscCall(PetscSFSetFromOptions(osf)); 5620 PetscCall(PetscSFSetUp(osf)); 5621 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5622 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5623 /* operate on the matrix internal data to save memory */ 5624 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5625 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5626 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5627 /* Convert to global indices for diag matrix */ 5628 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5629 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5630 /* We want P_oth store global indices */ 5631 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5632 /* Use memory scalable approach */ 5633 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5634 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5635 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5636 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5637 /* Convert back to local indices */ 5638 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5639 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5640 nout = 0; 5641 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5642 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5643 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5644 /* Exchange values */ 5645 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5646 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5647 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5648 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5649 /* Stop PETSc from shrinking memory */ 5650 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5651 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5652 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5653 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5654 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5655 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5656 PetscCall(PetscSFDestroy(&sf)); 5657 PetscCall(PetscSFDestroy(&osf)); 5658 PetscFunctionReturn(PETSC_SUCCESS); 5659 } 5660 5661 /* 5662 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5663 * This supports MPIAIJ and MAIJ 5664 * */ 5665 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5666 { 5667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5668 Mat_SeqAIJ *p_oth; 5669 IS rows, map; 5670 PetscHMapI hamp; 5671 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5672 MPI_Comm comm; 5673 PetscSF sf, osf; 5674 PetscBool has; 5675 5676 PetscFunctionBegin; 5677 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5678 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5679 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5680 * and then create a submatrix (that often is an overlapping matrix) 5681 * */ 5682 if (reuse == MAT_INITIAL_MATRIX) { 5683 /* Use a hash table to figure out unique keys */ 5684 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5685 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5686 count = 0; 5687 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5688 for (i = 0; i < a->B->cmap->n; i++) { 5689 key = a->garray[i] / dof; 5690 PetscCall(PetscHMapIHas(hamp, key, &has)); 5691 if (!has) { 5692 mapping[i] = count; 5693 PetscCall(PetscHMapISet(hamp, key, count++)); 5694 } else { 5695 /* Current 'i' has the same value the previous step */ 5696 mapping[i] = count - 1; 5697 } 5698 } 5699 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5700 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5701 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5702 PetscCall(PetscCalloc1(htsize, &rowindices)); 5703 off = 0; 5704 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5705 PetscCall(PetscHMapIDestroy(&hamp)); 5706 PetscCall(PetscSortInt(htsize, rowindices)); 5707 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5708 /* In case, the matrix was already created but users want to recreate the matrix */ 5709 PetscCall(MatDestroy(P_oth)); 5710 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5711 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5712 PetscCall(ISDestroy(&map)); 5713 PetscCall(ISDestroy(&rows)); 5714 } else if (reuse == MAT_REUSE_MATRIX) { 5715 /* If matrix was already created, we simply update values using SF objects 5716 * that as attached to the matrix earlier. 5717 */ 5718 const PetscScalar *pd_a, *po_a; 5719 5720 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5721 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5722 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5723 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5724 /* Update values in place */ 5725 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5726 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5727 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5728 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5729 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5730 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5731 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5732 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5733 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5734 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5735 PetscFunctionReturn(PETSC_SUCCESS); 5736 } 5737 5738 /*@C 5739 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5740 5741 Collective 5742 5743 Input Parameters: 5744 + A - the first matrix in `MATMPIAIJ` format 5745 . B - the second matrix in `MATMPIAIJ` format 5746 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5747 5748 Output Parameters: 5749 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5750 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5751 - B_seq - the sequential matrix generated 5752 5753 Level: developer 5754 5755 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5756 @*/ 5757 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5758 { 5759 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5760 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5761 IS isrowb, iscolb; 5762 Mat *bseq = NULL; 5763 5764 PetscFunctionBegin; 5765 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5766 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5767 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5768 5769 if (scall == MAT_INITIAL_MATRIX) { 5770 start = A->cmap->rstart; 5771 cmap = a->garray; 5772 nzA = a->A->cmap->n; 5773 nzB = a->B->cmap->n; 5774 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5775 ncols = 0; 5776 for (i = 0; i < nzB; i++) { /* row < local row index */ 5777 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5778 else break; 5779 } 5780 imark = i; 5781 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5782 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5783 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5784 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5785 } else { 5786 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5787 isrowb = *rowb; 5788 iscolb = *colb; 5789 PetscCall(PetscMalloc1(1, &bseq)); 5790 bseq[0] = *B_seq; 5791 } 5792 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5793 *B_seq = bseq[0]; 5794 PetscCall(PetscFree(bseq)); 5795 if (!rowb) { 5796 PetscCall(ISDestroy(&isrowb)); 5797 } else { 5798 *rowb = isrowb; 5799 } 5800 if (!colb) { 5801 PetscCall(ISDestroy(&iscolb)); 5802 } else { 5803 *colb = iscolb; 5804 } 5805 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5806 PetscFunctionReturn(PETSC_SUCCESS); 5807 } 5808 5809 /* 5810 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5811 of the OFF-DIAGONAL portion of local A 5812 5813 Collective 5814 5815 Input Parameters: 5816 + A,B - the matrices in `MATMPIAIJ` format 5817 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5818 5819 Output Parameter: 5820 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5821 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5822 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5823 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5824 5825 Developer Note: 5826 This directly accesses information inside the VecScatter associated with the matrix-vector product 5827 for this matrix. This is not desirable.. 5828 5829 Level: developer 5830 5831 */ 5832 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5833 { 5834 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5835 Mat_SeqAIJ *b_oth; 5836 VecScatter ctx; 5837 MPI_Comm comm; 5838 const PetscMPIInt *rprocs, *sprocs; 5839 const PetscInt *srow, *rstarts, *sstarts; 5840 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5841 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5842 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5843 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5844 PetscMPIInt size, tag, rank, nreqs; 5845 5846 PetscFunctionBegin; 5847 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5848 PetscCallMPI(MPI_Comm_size(comm, &size)); 5849 5850 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5851 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5852 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5853 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5854 5855 if (size == 1) { 5856 startsj_s = NULL; 5857 bufa_ptr = NULL; 5858 *B_oth = NULL; 5859 PetscFunctionReturn(PETSC_SUCCESS); 5860 } 5861 5862 ctx = a->Mvctx; 5863 tag = ((PetscObject)ctx)->tag; 5864 5865 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5866 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5867 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5868 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5869 PetscCall(PetscMalloc1(nreqs, &reqs)); 5870 rwaits = reqs; 5871 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5872 5873 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5874 if (scall == MAT_INITIAL_MATRIX) { 5875 /* i-array */ 5876 /* post receives */ 5877 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5878 for (i = 0; i < nrecvs; i++) { 5879 rowlen = rvalues + rstarts[i] * rbs; 5880 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5881 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5882 } 5883 5884 /* pack the outgoing message */ 5885 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5886 5887 sstartsj[0] = 0; 5888 rstartsj[0] = 0; 5889 len = 0; /* total length of j or a array to be sent */ 5890 if (nsends) { 5891 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5892 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5893 } 5894 for (i = 0; i < nsends; i++) { 5895 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5896 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5897 for (j = 0; j < nrows; j++) { 5898 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5899 for (l = 0; l < sbs; l++) { 5900 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5901 5902 rowlen[j * sbs + l] = ncols; 5903 5904 len += ncols; 5905 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5906 } 5907 k++; 5908 } 5909 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5910 5911 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5912 } 5913 /* recvs and sends of i-array are completed */ 5914 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5915 PetscCall(PetscFree(svalues)); 5916 5917 /* allocate buffers for sending j and a arrays */ 5918 PetscCall(PetscMalloc1(len + 1, &bufj)); 5919 PetscCall(PetscMalloc1(len + 1, &bufa)); 5920 5921 /* create i-array of B_oth */ 5922 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5923 5924 b_othi[0] = 0; 5925 len = 0; /* total length of j or a array to be received */ 5926 k = 0; 5927 for (i = 0; i < nrecvs; i++) { 5928 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5929 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5930 for (j = 0; j < nrows; j++) { 5931 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5932 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5933 k++; 5934 } 5935 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5936 } 5937 PetscCall(PetscFree(rvalues)); 5938 5939 /* allocate space for j and a arrays of B_oth */ 5940 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5941 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5942 5943 /* j-array */ 5944 /* post receives of j-array */ 5945 for (i = 0; i < nrecvs; i++) { 5946 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5947 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5948 } 5949 5950 /* pack the outgoing message j-array */ 5951 if (nsends) k = sstarts[0]; 5952 for (i = 0; i < nsends; i++) { 5953 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5954 bufJ = bufj + sstartsj[i]; 5955 for (j = 0; j < nrows; j++) { 5956 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5957 for (ll = 0; ll < sbs; ll++) { 5958 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5959 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5960 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5961 } 5962 } 5963 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5964 } 5965 5966 /* recvs and sends of j-array are completed */ 5967 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5968 } else if (scall == MAT_REUSE_MATRIX) { 5969 sstartsj = *startsj_s; 5970 rstartsj = *startsj_r; 5971 bufa = *bufa_ptr; 5972 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5973 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5974 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5975 5976 /* a-array */ 5977 /* post receives of a-array */ 5978 for (i = 0; i < nrecvs; i++) { 5979 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5980 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5981 } 5982 5983 /* pack the outgoing message a-array */ 5984 if (nsends) k = sstarts[0]; 5985 for (i = 0; i < nsends; i++) { 5986 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5987 bufA = bufa + sstartsj[i]; 5988 for (j = 0; j < nrows; j++) { 5989 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5990 for (ll = 0; ll < sbs; ll++) { 5991 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5992 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5993 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5994 } 5995 } 5996 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5997 } 5998 /* recvs and sends of a-array are completed */ 5999 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6000 PetscCall(PetscFree(reqs)); 6001 6002 if (scall == MAT_INITIAL_MATRIX) { 6003 /* put together the new matrix */ 6004 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6005 6006 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6007 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6008 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6009 b_oth->free_a = PETSC_TRUE; 6010 b_oth->free_ij = PETSC_TRUE; 6011 b_oth->nonew = 0; 6012 6013 PetscCall(PetscFree(bufj)); 6014 if (!startsj_s || !bufa_ptr) { 6015 PetscCall(PetscFree2(sstartsj, rstartsj)); 6016 PetscCall(PetscFree(bufa_ptr)); 6017 } else { 6018 *startsj_s = sstartsj; 6019 *startsj_r = rstartsj; 6020 *bufa_ptr = bufa; 6021 } 6022 } else if (scall == MAT_REUSE_MATRIX) { 6023 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6024 } 6025 6026 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6027 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6028 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6029 PetscFunctionReturn(PETSC_SUCCESS); 6030 } 6031 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6035 #if defined(PETSC_HAVE_MKL_SPARSE) 6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6037 #endif 6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6040 #if defined(PETSC_HAVE_ELEMENTAL) 6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6042 #endif 6043 #if defined(PETSC_HAVE_SCALAPACK) 6044 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6045 #endif 6046 #if defined(PETSC_HAVE_HYPRE) 6047 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6048 #endif 6049 #if defined(PETSC_HAVE_CUDA) 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6051 #endif 6052 #if defined(PETSC_HAVE_HIP) 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6054 #endif 6055 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6057 #endif 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6059 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6060 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6061 6062 /* 6063 Computes (B'*A')' since computing B*A directly is untenable 6064 6065 n p p 6066 [ ] [ ] [ ] 6067 m [ A ] * n [ B ] = m [ C ] 6068 [ ] [ ] [ ] 6069 6070 */ 6071 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6072 { 6073 Mat At, Bt, Ct; 6074 6075 PetscFunctionBegin; 6076 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6077 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6078 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6079 PetscCall(MatDestroy(&At)); 6080 PetscCall(MatDestroy(&Bt)); 6081 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6082 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6083 PetscCall(MatDestroy(&Ct)); 6084 PetscFunctionReturn(PETSC_SUCCESS); 6085 } 6086 6087 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6088 { 6089 PetscBool cisdense; 6090 6091 PetscFunctionBegin; 6092 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6093 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6094 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6095 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6096 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6097 PetscCall(MatSetUp(C)); 6098 6099 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6100 PetscFunctionReturn(PETSC_SUCCESS); 6101 } 6102 6103 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6104 { 6105 Mat_Product *product = C->product; 6106 Mat A = product->A, B = product->B; 6107 6108 PetscFunctionBegin; 6109 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6110 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6111 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6112 C->ops->productsymbolic = MatProductSymbolic_AB; 6113 PetscFunctionReturn(PETSC_SUCCESS); 6114 } 6115 6116 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6117 { 6118 Mat_Product *product = C->product; 6119 6120 PetscFunctionBegin; 6121 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6122 PetscFunctionReturn(PETSC_SUCCESS); 6123 } 6124 6125 /* 6126 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6127 6128 Input Parameters: 6129 6130 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6131 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6132 6133 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6134 6135 For Set1, j1[] contains column indices of the nonzeros. 6136 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6137 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6138 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6139 6140 Similar for Set2. 6141 6142 This routine merges the two sets of nonzeros row by row and removes repeats. 6143 6144 Output Parameters: (memory is allocated by the caller) 6145 6146 i[],j[]: the CSR of the merged matrix, which has m rows. 6147 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6148 imap2[]: similar to imap1[], but for Set2. 6149 Note we order nonzeros row-by-row and from left to right. 6150 */ 6151 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6152 { 6153 PetscInt r, m; /* Row index of mat */ 6154 PetscCount t, t1, t2, b1, e1, b2, e2; 6155 6156 PetscFunctionBegin; 6157 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6158 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6159 i[0] = 0; 6160 for (r = 0; r < m; r++) { /* Do row by row merging */ 6161 b1 = rowBegin1[r]; 6162 e1 = rowEnd1[r]; 6163 b2 = rowBegin2[r]; 6164 e2 = rowEnd2[r]; 6165 while (b1 < e1 && b2 < e2) { 6166 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6167 j[t] = j1[b1]; 6168 imap1[t1] = t; 6169 imap2[t2] = t; 6170 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6171 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6172 t1++; 6173 t2++; 6174 t++; 6175 } else if (j1[b1] < j2[b2]) { 6176 j[t] = j1[b1]; 6177 imap1[t1] = t; 6178 b1 += jmap1[t1 + 1] - jmap1[t1]; 6179 t1++; 6180 t++; 6181 } else { 6182 j[t] = j2[b2]; 6183 imap2[t2] = t; 6184 b2 += jmap2[t2 + 1] - jmap2[t2]; 6185 t2++; 6186 t++; 6187 } 6188 } 6189 /* Merge the remaining in either j1[] or j2[] */ 6190 while (b1 < e1) { 6191 j[t] = j1[b1]; 6192 imap1[t1] = t; 6193 b1 += jmap1[t1 + 1] - jmap1[t1]; 6194 t1++; 6195 t++; 6196 } 6197 while (b2 < e2) { 6198 j[t] = j2[b2]; 6199 imap2[t2] = t; 6200 b2 += jmap2[t2 + 1] - jmap2[t2]; 6201 t2++; 6202 t++; 6203 } 6204 i[r + 1] = t; 6205 } 6206 PetscFunctionReturn(PETSC_SUCCESS); 6207 } 6208 6209 /* 6210 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6211 6212 Input Parameters: 6213 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6214 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6215 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6216 6217 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6218 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6219 6220 Output Parameters: 6221 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6222 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6223 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6224 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6225 6226 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6227 Atot: number of entries belonging to the diagonal block. 6228 Annz: number of unique nonzeros belonging to the diagonal block. 6229 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6230 repeats (i.e., same 'i,j' pair). 6231 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6232 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6233 6234 Atot: number of entries belonging to the diagonal block 6235 Annz: number of unique nonzeros belonging to the diagonal block. 6236 6237 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6238 6239 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6240 */ 6241 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6242 { 6243 PetscInt cstart, cend, rstart, rend, row, col; 6244 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6245 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6246 PetscCount k, m, p, q, r, s, mid; 6247 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6248 6249 PetscFunctionBegin; 6250 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6251 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6252 m = rend - rstart; 6253 6254 /* Skip negative rows */ 6255 for (k = 0; k < n; k++) 6256 if (i[k] >= 0) break; 6257 6258 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6259 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6260 */ 6261 while (k < n) { 6262 row = i[k]; 6263 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6264 for (s = k; s < n; s++) 6265 if (i[s] != row) break; 6266 6267 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6268 for (p = k; p < s; p++) { 6269 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6270 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6271 } 6272 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6273 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6274 rowBegin[row - rstart] = k; 6275 rowMid[row - rstart] = mid; 6276 rowEnd[row - rstart] = s; 6277 6278 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6279 Atot += mid - k; 6280 Btot += s - mid; 6281 6282 /* Count unique nonzeros of this diag row */ 6283 for (p = k; p < mid;) { 6284 col = j[p]; 6285 do { 6286 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6287 p++; 6288 } while (p < mid && j[p] == col); 6289 Annz++; 6290 } 6291 6292 /* Count unique nonzeros of this offdiag row */ 6293 for (p = mid; p < s;) { 6294 col = j[p]; 6295 do { 6296 p++; 6297 } while (p < s && j[p] == col); 6298 Bnnz++; 6299 } 6300 k = s; 6301 } 6302 6303 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6304 PetscCall(PetscMalloc1(Atot, &Aperm)); 6305 PetscCall(PetscMalloc1(Btot, &Bperm)); 6306 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6307 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6308 6309 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6310 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6311 for (r = 0; r < m; r++) { 6312 k = rowBegin[r]; 6313 mid = rowMid[r]; 6314 s = rowEnd[r]; 6315 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6316 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6317 Atot += mid - k; 6318 Btot += s - mid; 6319 6320 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6321 for (p = k; p < mid;) { 6322 col = j[p]; 6323 q = p; 6324 do { 6325 p++; 6326 } while (p < mid && j[p] == col); 6327 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6328 Annz++; 6329 } 6330 6331 for (p = mid; p < s;) { 6332 col = j[p]; 6333 q = p; 6334 do { 6335 p++; 6336 } while (p < s && j[p] == col); 6337 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6338 Bnnz++; 6339 } 6340 } 6341 /* Output */ 6342 *Aperm_ = Aperm; 6343 *Annz_ = Annz; 6344 *Atot_ = Atot; 6345 *Ajmap_ = Ajmap; 6346 *Bperm_ = Bperm; 6347 *Bnnz_ = Bnnz; 6348 *Btot_ = Btot; 6349 *Bjmap_ = Bjmap; 6350 PetscFunctionReturn(PETSC_SUCCESS); 6351 } 6352 6353 /* 6354 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6355 6356 Input Parameters: 6357 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6358 nnz: number of unique nonzeros in the merged matrix 6359 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6360 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6361 6362 Output Parameter: (memory is allocated by the caller) 6363 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6364 6365 Example: 6366 nnz1 = 4 6367 nnz = 6 6368 imap = [1,3,4,5] 6369 jmap = [0,3,5,6,7] 6370 then, 6371 jmap_new = [0,0,3,3,5,6,7] 6372 */ 6373 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6374 { 6375 PetscCount k, p; 6376 6377 PetscFunctionBegin; 6378 jmap_new[0] = 0; 6379 p = nnz; /* p loops over jmap_new[] backwards */ 6380 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6381 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6382 } 6383 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6384 PetscFunctionReturn(PETSC_SUCCESS); 6385 } 6386 6387 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6388 { 6389 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6390 6391 PetscFunctionBegin; 6392 PetscCall(PetscSFDestroy(&coo->sf)); 6393 PetscCall(PetscFree(coo->Aperm1)); 6394 PetscCall(PetscFree(coo->Bperm1)); 6395 PetscCall(PetscFree(coo->Ajmap1)); 6396 PetscCall(PetscFree(coo->Bjmap1)); 6397 PetscCall(PetscFree(coo->Aimap2)); 6398 PetscCall(PetscFree(coo->Bimap2)); 6399 PetscCall(PetscFree(coo->Aperm2)); 6400 PetscCall(PetscFree(coo->Bperm2)); 6401 PetscCall(PetscFree(coo->Ajmap2)); 6402 PetscCall(PetscFree(coo->Bjmap2)); 6403 PetscCall(PetscFree(coo->Cperm1)); 6404 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6405 PetscCall(PetscFree(coo)); 6406 PetscFunctionReturn(PETSC_SUCCESS); 6407 } 6408 6409 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6410 { 6411 MPI_Comm comm; 6412 PetscMPIInt rank, size; 6413 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6414 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6415 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6416 PetscContainer container; 6417 MatCOOStruct_MPIAIJ *coo; 6418 6419 PetscFunctionBegin; 6420 PetscCall(PetscFree(mpiaij->garray)); 6421 PetscCall(VecDestroy(&mpiaij->lvec)); 6422 #if defined(PETSC_USE_CTABLE) 6423 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6424 #else 6425 PetscCall(PetscFree(mpiaij->colmap)); 6426 #endif 6427 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6428 mat->assembled = PETSC_FALSE; 6429 mat->was_assembled = PETSC_FALSE; 6430 6431 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6432 PetscCallMPI(MPI_Comm_size(comm, &size)); 6433 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6434 PetscCall(PetscLayoutSetUp(mat->rmap)); 6435 PetscCall(PetscLayoutSetUp(mat->cmap)); 6436 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6437 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6438 PetscCall(MatGetLocalSize(mat, &m, &n)); 6439 PetscCall(MatGetSize(mat, &M, &N)); 6440 6441 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6442 /* entries come first, then local rows, then remote rows. */ 6443 PetscCount n1 = coo_n, *perm1; 6444 PetscInt *i1 = coo_i, *j1 = coo_j; 6445 6446 PetscCall(PetscMalloc1(n1, &perm1)); 6447 for (k = 0; k < n1; k++) perm1[k] = k; 6448 6449 /* Manipulate indices so that entries with negative row or col indices will have smallest 6450 row indices, local entries will have greater but negative row indices, and remote entries 6451 will have positive row indices. 6452 */ 6453 for (k = 0; k < n1; k++) { 6454 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6455 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6456 else { 6457 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6458 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6459 } 6460 } 6461 6462 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6463 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6464 6465 /* Advance k to the first entry we need to take care of */ 6466 for (k = 0; k < n1; k++) 6467 if (i1[k] > PETSC_MIN_INT) break; 6468 PetscInt i1start = k; 6469 6470 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6471 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6472 6473 /* Send remote rows to their owner */ 6474 /* Find which rows should be sent to which remote ranks*/ 6475 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6476 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6477 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6478 const PetscInt *ranges; 6479 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6480 6481 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6482 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6483 for (k = rem; k < n1;) { 6484 PetscMPIInt owner; 6485 PetscInt firstRow, lastRow; 6486 6487 /* Locate a row range */ 6488 firstRow = i1[k]; /* first row of this owner */ 6489 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6490 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6491 6492 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6493 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6494 6495 /* All entries in [k,p) belong to this remote owner */ 6496 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6497 PetscMPIInt *sendto2; 6498 PetscInt *nentries2; 6499 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6500 6501 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6502 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6503 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6504 PetscCall(PetscFree2(sendto, nentries2)); 6505 sendto = sendto2; 6506 nentries = nentries2; 6507 maxNsend = maxNsend2; 6508 } 6509 sendto[nsend] = owner; 6510 nentries[nsend] = p - k; 6511 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6512 nsend++; 6513 k = p; 6514 } 6515 6516 /* Build 1st SF to know offsets on remote to send data */ 6517 PetscSF sf1; 6518 PetscInt nroots = 1, nroots2 = 0; 6519 PetscInt nleaves = nsend, nleaves2 = 0; 6520 PetscInt *offsets; 6521 PetscSFNode *iremote; 6522 6523 PetscCall(PetscSFCreate(comm, &sf1)); 6524 PetscCall(PetscMalloc1(nsend, &iremote)); 6525 PetscCall(PetscMalloc1(nsend, &offsets)); 6526 for (k = 0; k < nsend; k++) { 6527 iremote[k].rank = sendto[k]; 6528 iremote[k].index = 0; 6529 nleaves2 += nentries[k]; 6530 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6531 } 6532 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6533 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6534 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6535 PetscCall(PetscSFDestroy(&sf1)); 6536 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6537 6538 /* Build 2nd SF to send remote COOs to their owner */ 6539 PetscSF sf2; 6540 nroots = nroots2; 6541 nleaves = nleaves2; 6542 PetscCall(PetscSFCreate(comm, &sf2)); 6543 PetscCall(PetscSFSetFromOptions(sf2)); 6544 PetscCall(PetscMalloc1(nleaves, &iremote)); 6545 p = 0; 6546 for (k = 0; k < nsend; k++) { 6547 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6548 for (q = 0; q < nentries[k]; q++, p++) { 6549 iremote[p].rank = sendto[k]; 6550 iremote[p].index = offsets[k] + q; 6551 } 6552 } 6553 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6554 6555 /* Send the remote COOs to their owner */ 6556 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6557 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6558 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6559 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6560 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6561 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6562 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6563 6564 PetscCall(PetscFree(offsets)); 6565 PetscCall(PetscFree2(sendto, nentries)); 6566 6567 /* Sort received COOs by row along with the permutation array */ 6568 for (k = 0; k < n2; k++) perm2[k] = k; 6569 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6570 6571 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6572 PetscCount *Cperm1; 6573 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6574 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves)); 6575 6576 /* Support for HYPRE matrices, kind of a hack. 6577 Swap min column with diagonal so that diagonal values will go first */ 6578 PetscBool hypre; 6579 const char *name; 6580 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6581 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6582 if (hypre) { 6583 PetscInt *minj; 6584 PetscBT hasdiag; 6585 6586 PetscCall(PetscBTCreate(m, &hasdiag)); 6587 PetscCall(PetscMalloc1(m, &minj)); 6588 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6589 for (k = i1start; k < rem; k++) { 6590 if (j1[k] < cstart || j1[k] >= cend) continue; 6591 const PetscInt rindex = i1[k] - rstart; 6592 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6593 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6594 } 6595 for (k = 0; k < n2; k++) { 6596 if (j2[k] < cstart || j2[k] >= cend) continue; 6597 const PetscInt rindex = i2[k] - rstart; 6598 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6599 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6600 } 6601 for (k = i1start; k < rem; k++) { 6602 const PetscInt rindex = i1[k] - rstart; 6603 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6604 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6605 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6606 } 6607 for (k = 0; k < n2; k++) { 6608 const PetscInt rindex = i2[k] - rstart; 6609 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6610 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6611 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6612 } 6613 PetscCall(PetscBTDestroy(&hasdiag)); 6614 PetscCall(PetscFree(minj)); 6615 } 6616 6617 /* Split local COOs and received COOs into diag/offdiag portions */ 6618 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6619 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6620 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6621 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6622 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6623 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6624 6625 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6626 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6627 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6628 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6629 6630 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6631 PetscInt *Ai, *Bi; 6632 PetscInt *Aj, *Bj; 6633 6634 PetscCall(PetscMalloc1(m + 1, &Ai)); 6635 PetscCall(PetscMalloc1(m + 1, &Bi)); 6636 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6637 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6638 6639 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6640 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6641 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6642 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6643 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6644 6645 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6646 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6647 6648 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6649 /* expect nonzeros in A/B most likely have local contributing entries */ 6650 PetscInt Annz = Ai[m]; 6651 PetscInt Bnnz = Bi[m]; 6652 PetscCount *Ajmap1_new, *Bjmap1_new; 6653 6654 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6655 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6656 6657 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6658 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6659 6660 PetscCall(PetscFree(Aimap1)); 6661 PetscCall(PetscFree(Ajmap1)); 6662 PetscCall(PetscFree(Bimap1)); 6663 PetscCall(PetscFree(Bjmap1)); 6664 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6665 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6666 PetscCall(PetscFree(perm1)); 6667 PetscCall(PetscFree3(i2, j2, perm2)); 6668 6669 Ajmap1 = Ajmap1_new; 6670 Bjmap1 = Bjmap1_new; 6671 6672 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6673 if (Annz < Annz1 + Annz2) { 6674 PetscInt *Aj_new; 6675 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6676 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6677 PetscCall(PetscFree(Aj)); 6678 Aj = Aj_new; 6679 } 6680 6681 if (Bnnz < Bnnz1 + Bnnz2) { 6682 PetscInt *Bj_new; 6683 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6684 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6685 PetscCall(PetscFree(Bj)); 6686 Bj = Bj_new; 6687 } 6688 6689 /* Create new submatrices for on-process and off-process coupling */ 6690 PetscScalar *Aa, *Ba; 6691 MatType rtype; 6692 Mat_SeqAIJ *a, *b; 6693 PetscObjectState state; 6694 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6695 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6696 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6697 if (cstart) { 6698 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6699 } 6700 6701 PetscCall(MatGetRootType_Private(mat, &rtype)); 6702 6703 MatSeqXAIJGetOptions_Private(mpiaij->A); 6704 PetscCall(MatDestroy(&mpiaij->A)); 6705 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6706 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6707 6708 MatSeqXAIJGetOptions_Private(mpiaij->B); 6709 PetscCall(MatDestroy(&mpiaij->B)); 6710 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6711 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6712 6713 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6714 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6715 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6716 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6717 6718 a = (Mat_SeqAIJ *)mpiaij->A->data; 6719 b = (Mat_SeqAIJ *)mpiaij->B->data; 6720 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6721 a->free_a = b->free_a = PETSC_TRUE; 6722 a->free_ij = b->free_ij = PETSC_TRUE; 6723 6724 /* conversion must happen AFTER multiply setup */ 6725 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6726 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6727 PetscCall(VecDestroy(&mpiaij->lvec)); 6728 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6729 6730 // Put the COO struct in a container and then attach that to the matrix 6731 PetscCall(PetscMalloc1(1, &coo)); 6732 coo->n = coo_n; 6733 coo->sf = sf2; 6734 coo->sendlen = nleaves; 6735 coo->recvlen = nroots; 6736 coo->Annz = Annz; 6737 coo->Bnnz = Bnnz; 6738 coo->Annz2 = Annz2; 6739 coo->Bnnz2 = Bnnz2; 6740 coo->Atot1 = Atot1; 6741 coo->Atot2 = Atot2; 6742 coo->Btot1 = Btot1; 6743 coo->Btot2 = Btot2; 6744 coo->Ajmap1 = Ajmap1; 6745 coo->Aperm1 = Aperm1; 6746 coo->Bjmap1 = Bjmap1; 6747 coo->Bperm1 = Bperm1; 6748 coo->Aimap2 = Aimap2; 6749 coo->Ajmap2 = Ajmap2; 6750 coo->Aperm2 = Aperm2; 6751 coo->Bimap2 = Bimap2; 6752 coo->Bjmap2 = Bjmap2; 6753 coo->Bperm2 = Bperm2; 6754 coo->Cperm1 = Cperm1; 6755 // Allocate in preallocation. If not used, it has zero cost on host 6756 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6757 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6758 PetscCall(PetscContainerSetPointer(container, coo)); 6759 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6760 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6761 PetscCall(PetscContainerDestroy(&container)); 6762 PetscFunctionReturn(PETSC_SUCCESS); 6763 } 6764 6765 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6766 { 6767 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6768 Mat A = mpiaij->A, B = mpiaij->B; 6769 PetscScalar *Aa, *Ba; 6770 PetscScalar *sendbuf, *recvbuf; 6771 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6772 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6773 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6774 const PetscCount *Cperm1; 6775 PetscContainer container; 6776 MatCOOStruct_MPIAIJ *coo; 6777 6778 PetscFunctionBegin; 6779 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6780 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6781 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6782 sendbuf = coo->sendbuf; 6783 recvbuf = coo->recvbuf; 6784 Ajmap1 = coo->Ajmap1; 6785 Ajmap2 = coo->Ajmap2; 6786 Aimap2 = coo->Aimap2; 6787 Bjmap1 = coo->Bjmap1; 6788 Bjmap2 = coo->Bjmap2; 6789 Bimap2 = coo->Bimap2; 6790 Aperm1 = coo->Aperm1; 6791 Aperm2 = coo->Aperm2; 6792 Bperm1 = coo->Bperm1; 6793 Bperm2 = coo->Bperm2; 6794 Cperm1 = coo->Cperm1; 6795 6796 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6797 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6798 6799 /* Pack entries to be sent to remote */ 6800 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6801 6802 /* Send remote entries to their owner and overlap the communication with local computation */ 6803 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6804 /* Add local entries to A and B */ 6805 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6806 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6807 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6808 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6809 } 6810 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6811 PetscScalar sum = 0.0; 6812 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6813 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6814 } 6815 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6816 6817 /* Add received remote entries to A and B */ 6818 for (PetscCount i = 0; i < coo->Annz2; i++) { 6819 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6820 } 6821 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6822 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6823 } 6824 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6825 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6826 PetscFunctionReturn(PETSC_SUCCESS); 6827 } 6828 6829 /*MC 6830 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6831 6832 Options Database Keys: 6833 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6834 6835 Level: beginner 6836 6837 Notes: 6838 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6839 in this case the values associated with the rows and columns one passes in are set to zero 6840 in the matrix 6841 6842 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6843 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6844 6845 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6846 M*/ 6847 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6848 { 6849 Mat_MPIAIJ *b; 6850 PetscMPIInt size; 6851 6852 PetscFunctionBegin; 6853 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6854 6855 PetscCall(PetscNew(&b)); 6856 B->data = (void *)b; 6857 B->ops[0] = MatOps_Values; 6858 B->assembled = PETSC_FALSE; 6859 B->insertmode = NOT_SET_VALUES; 6860 b->size = size; 6861 6862 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6863 6864 /* build cache for off array entries formed */ 6865 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6866 6867 b->donotstash = PETSC_FALSE; 6868 b->colmap = NULL; 6869 b->garray = NULL; 6870 b->roworiented = PETSC_TRUE; 6871 6872 /* stuff used for matrix vector multiply */ 6873 b->lvec = NULL; 6874 b->Mvctx = NULL; 6875 6876 /* stuff for MatGetRow() */ 6877 b->rowindices = NULL; 6878 b->rowvalues = NULL; 6879 b->getrowactive = PETSC_FALSE; 6880 6881 /* flexible pointer used in CUSPARSE classes */ 6882 b->spptr = NULL; 6883 6884 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6885 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6886 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6887 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6889 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6894 #if defined(PETSC_HAVE_CUDA) 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6896 #endif 6897 #if defined(PETSC_HAVE_HIP) 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6899 #endif 6900 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6902 #endif 6903 #if defined(PETSC_HAVE_MKL_SPARSE) 6904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6905 #endif 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6910 #if defined(PETSC_HAVE_ELEMENTAL) 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6912 #endif 6913 #if defined(PETSC_HAVE_SCALAPACK) 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6915 #endif 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6918 #if defined(PETSC_HAVE_HYPRE) 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6921 #endif 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6926 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6927 PetscFunctionReturn(PETSC_SUCCESS); 6928 } 6929 6930 /*@C 6931 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6932 and "off-diagonal" part of the matrix in CSR format. 6933 6934 Collective 6935 6936 Input Parameters: 6937 + comm - MPI communicator 6938 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6939 . n - This value should be the same as the local size used in creating the 6940 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6941 calculated if `N` is given) For square matrices `n` is almost always `m`. 6942 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6943 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6944 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6945 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6946 . a - matrix values 6947 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6948 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6949 - oa - matrix values 6950 6951 Output Parameter: 6952 . mat - the matrix 6953 6954 Level: advanced 6955 6956 Notes: 6957 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6958 must free the arrays once the matrix has been destroyed and not before. 6959 6960 The `i` and `j` indices are 0 based 6961 6962 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6963 6964 This sets local rows and cannot be used to set off-processor values. 6965 6966 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6967 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6968 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6969 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6970 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6971 communication if it is known that only local entries will be set. 6972 6973 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6974 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6975 @*/ 6976 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6977 { 6978 Mat_MPIAIJ *maij; 6979 6980 PetscFunctionBegin; 6981 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6982 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6983 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6984 PetscCall(MatCreate(comm, mat)); 6985 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6986 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6987 maij = (Mat_MPIAIJ *)(*mat)->data; 6988 6989 (*mat)->preallocated = PETSC_TRUE; 6990 6991 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6992 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6993 6994 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6995 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6996 6997 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6998 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6999 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7000 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7001 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7002 PetscFunctionReturn(PETSC_SUCCESS); 7003 } 7004 7005 typedef struct { 7006 Mat *mp; /* intermediate products */ 7007 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7008 PetscInt cp; /* number of intermediate products */ 7009 7010 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7011 PetscInt *startsj_s, *startsj_r; 7012 PetscScalar *bufa; 7013 Mat P_oth; 7014 7015 /* may take advantage of merging product->B */ 7016 Mat Bloc; /* B-local by merging diag and off-diag */ 7017 7018 /* cusparse does not have support to split between symbolic and numeric phases. 7019 When api_user is true, we don't need to update the numerical values 7020 of the temporary storage */ 7021 PetscBool reusesym; 7022 7023 /* support for COO values insertion */ 7024 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7025 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7026 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7027 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7028 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7029 PetscMemType mtype; 7030 7031 /* customization */ 7032 PetscBool abmerge; 7033 PetscBool P_oth_bind; 7034 } MatMatMPIAIJBACKEND; 7035 7036 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7037 { 7038 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7039 PetscInt i; 7040 7041 PetscFunctionBegin; 7042 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7043 PetscCall(PetscFree(mmdata->bufa)); 7044 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7045 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7046 PetscCall(MatDestroy(&mmdata->P_oth)); 7047 PetscCall(MatDestroy(&mmdata->Bloc)); 7048 PetscCall(PetscSFDestroy(&mmdata->sf)); 7049 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7050 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7051 PetscCall(PetscFree(mmdata->own[0])); 7052 PetscCall(PetscFree(mmdata->own)); 7053 PetscCall(PetscFree(mmdata->off[0])); 7054 PetscCall(PetscFree(mmdata->off)); 7055 PetscCall(PetscFree(mmdata)); 7056 PetscFunctionReturn(PETSC_SUCCESS); 7057 } 7058 7059 /* Copy selected n entries with indices in idx[] of A to v[]. 7060 If idx is NULL, copy the whole data array of A to v[] 7061 */ 7062 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7063 { 7064 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7065 7066 PetscFunctionBegin; 7067 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7068 if (f) { 7069 PetscCall((*f)(A, n, idx, v)); 7070 } else { 7071 const PetscScalar *vv; 7072 7073 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7074 if (n && idx) { 7075 PetscScalar *w = v; 7076 const PetscInt *oi = idx; 7077 PetscInt j; 7078 7079 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7080 } else { 7081 PetscCall(PetscArraycpy(v, vv, n)); 7082 } 7083 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7084 } 7085 PetscFunctionReturn(PETSC_SUCCESS); 7086 } 7087 7088 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7089 { 7090 MatMatMPIAIJBACKEND *mmdata; 7091 PetscInt i, n_d, n_o; 7092 7093 PetscFunctionBegin; 7094 MatCheckProduct(C, 1); 7095 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7096 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7097 if (!mmdata->reusesym) { /* update temporary matrices */ 7098 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7099 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7100 } 7101 mmdata->reusesym = PETSC_FALSE; 7102 7103 for (i = 0; i < mmdata->cp; i++) { 7104 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7105 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7106 } 7107 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7108 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7109 7110 if (mmdata->mptmp[i]) continue; 7111 if (noff) { 7112 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7113 7114 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7115 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7116 n_o += noff; 7117 n_d += nown; 7118 } else { 7119 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7120 7121 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7122 n_d += mm->nz; 7123 } 7124 } 7125 if (mmdata->hasoffproc) { /* offprocess insertion */ 7126 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7127 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7128 } 7129 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7130 PetscFunctionReturn(PETSC_SUCCESS); 7131 } 7132 7133 /* Support for Pt * A, A * P, or Pt * A * P */ 7134 #define MAX_NUMBER_INTERMEDIATE 4 7135 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7136 { 7137 Mat_Product *product = C->product; 7138 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7139 Mat_MPIAIJ *a, *p; 7140 MatMatMPIAIJBACKEND *mmdata; 7141 ISLocalToGlobalMapping P_oth_l2g = NULL; 7142 IS glob = NULL; 7143 const char *prefix; 7144 char pprefix[256]; 7145 const PetscInt *globidx, *P_oth_idx; 7146 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7147 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7148 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7149 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7150 /* a base offset; type-2: sparse with a local to global map table */ 7151 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7152 7153 MatProductType ptype; 7154 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7155 PetscMPIInt size; 7156 7157 PetscFunctionBegin; 7158 MatCheckProduct(C, 1); 7159 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7160 ptype = product->type; 7161 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7162 ptype = MATPRODUCT_AB; 7163 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7164 } 7165 switch (ptype) { 7166 case MATPRODUCT_AB: 7167 A = product->A; 7168 P = product->B; 7169 m = A->rmap->n; 7170 n = P->cmap->n; 7171 M = A->rmap->N; 7172 N = P->cmap->N; 7173 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7174 break; 7175 case MATPRODUCT_AtB: 7176 P = product->A; 7177 A = product->B; 7178 m = P->cmap->n; 7179 n = A->cmap->n; 7180 M = P->cmap->N; 7181 N = A->cmap->N; 7182 hasoffproc = PETSC_TRUE; 7183 break; 7184 case MATPRODUCT_PtAP: 7185 A = product->A; 7186 P = product->B; 7187 m = P->cmap->n; 7188 n = P->cmap->n; 7189 M = P->cmap->N; 7190 N = P->cmap->N; 7191 hasoffproc = PETSC_TRUE; 7192 break; 7193 default: 7194 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7195 } 7196 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7197 if (size == 1) hasoffproc = PETSC_FALSE; 7198 7199 /* defaults */ 7200 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7201 mp[i] = NULL; 7202 mptmp[i] = PETSC_FALSE; 7203 rmapt[i] = -1; 7204 cmapt[i] = -1; 7205 rmapa[i] = NULL; 7206 cmapa[i] = NULL; 7207 } 7208 7209 /* customization */ 7210 PetscCall(PetscNew(&mmdata)); 7211 mmdata->reusesym = product->api_user; 7212 if (ptype == MATPRODUCT_AB) { 7213 if (product->api_user) { 7214 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7215 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7216 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7217 PetscOptionsEnd(); 7218 } else { 7219 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7220 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7221 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7222 PetscOptionsEnd(); 7223 } 7224 } else if (ptype == MATPRODUCT_PtAP) { 7225 if (product->api_user) { 7226 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7227 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7228 PetscOptionsEnd(); 7229 } else { 7230 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7231 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7232 PetscOptionsEnd(); 7233 } 7234 } 7235 a = (Mat_MPIAIJ *)A->data; 7236 p = (Mat_MPIAIJ *)P->data; 7237 PetscCall(MatSetSizes(C, m, n, M, N)); 7238 PetscCall(PetscLayoutSetUp(C->rmap)); 7239 PetscCall(PetscLayoutSetUp(C->cmap)); 7240 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7241 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7242 7243 cp = 0; 7244 switch (ptype) { 7245 case MATPRODUCT_AB: /* A * P */ 7246 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7247 7248 /* A_diag * P_local (merged or not) */ 7249 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7250 /* P is product->B */ 7251 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7252 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7253 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7254 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7255 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7256 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7257 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7258 mp[cp]->product->api_user = product->api_user; 7259 PetscCall(MatProductSetFromOptions(mp[cp])); 7260 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7261 PetscCall(ISGetIndices(glob, &globidx)); 7262 rmapt[cp] = 1; 7263 cmapt[cp] = 2; 7264 cmapa[cp] = globidx; 7265 mptmp[cp] = PETSC_FALSE; 7266 cp++; 7267 } else { /* A_diag * P_diag and A_diag * P_off */ 7268 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7269 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7270 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7271 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7272 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7273 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7274 mp[cp]->product->api_user = product->api_user; 7275 PetscCall(MatProductSetFromOptions(mp[cp])); 7276 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7277 rmapt[cp] = 1; 7278 cmapt[cp] = 1; 7279 mptmp[cp] = PETSC_FALSE; 7280 cp++; 7281 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7282 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7283 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7284 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7285 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7286 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7287 mp[cp]->product->api_user = product->api_user; 7288 PetscCall(MatProductSetFromOptions(mp[cp])); 7289 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7290 rmapt[cp] = 1; 7291 cmapt[cp] = 2; 7292 cmapa[cp] = p->garray; 7293 mptmp[cp] = PETSC_FALSE; 7294 cp++; 7295 } 7296 7297 /* A_off * P_other */ 7298 if (mmdata->P_oth) { 7299 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7300 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7301 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7302 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7303 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7304 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7305 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7306 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7307 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7308 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7309 mp[cp]->product->api_user = product->api_user; 7310 PetscCall(MatProductSetFromOptions(mp[cp])); 7311 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7312 rmapt[cp] = 1; 7313 cmapt[cp] = 2; 7314 cmapa[cp] = P_oth_idx; 7315 mptmp[cp] = PETSC_FALSE; 7316 cp++; 7317 } 7318 break; 7319 7320 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7321 /* A is product->B */ 7322 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7323 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7324 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7325 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7326 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7327 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7328 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7329 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7330 mp[cp]->product->api_user = product->api_user; 7331 PetscCall(MatProductSetFromOptions(mp[cp])); 7332 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7333 PetscCall(ISGetIndices(glob, &globidx)); 7334 rmapt[cp] = 2; 7335 rmapa[cp] = globidx; 7336 cmapt[cp] = 2; 7337 cmapa[cp] = globidx; 7338 mptmp[cp] = PETSC_FALSE; 7339 cp++; 7340 } else { 7341 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7342 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7343 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7344 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7345 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7346 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7347 mp[cp]->product->api_user = product->api_user; 7348 PetscCall(MatProductSetFromOptions(mp[cp])); 7349 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7350 PetscCall(ISGetIndices(glob, &globidx)); 7351 rmapt[cp] = 1; 7352 cmapt[cp] = 2; 7353 cmapa[cp] = globidx; 7354 mptmp[cp] = PETSC_FALSE; 7355 cp++; 7356 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7357 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7358 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7359 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7360 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7361 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7362 mp[cp]->product->api_user = product->api_user; 7363 PetscCall(MatProductSetFromOptions(mp[cp])); 7364 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7365 rmapt[cp] = 2; 7366 rmapa[cp] = p->garray; 7367 cmapt[cp] = 2; 7368 cmapa[cp] = globidx; 7369 mptmp[cp] = PETSC_FALSE; 7370 cp++; 7371 } 7372 break; 7373 case MATPRODUCT_PtAP: 7374 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7375 /* P is product->B */ 7376 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7377 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7378 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7379 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7380 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7381 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7382 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7383 mp[cp]->product->api_user = product->api_user; 7384 PetscCall(MatProductSetFromOptions(mp[cp])); 7385 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7386 PetscCall(ISGetIndices(glob, &globidx)); 7387 rmapt[cp] = 2; 7388 rmapa[cp] = globidx; 7389 cmapt[cp] = 2; 7390 cmapa[cp] = globidx; 7391 mptmp[cp] = PETSC_FALSE; 7392 cp++; 7393 if (mmdata->P_oth) { 7394 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7395 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7396 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7397 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7398 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7399 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7400 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7401 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7402 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7403 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7404 mp[cp]->product->api_user = product->api_user; 7405 PetscCall(MatProductSetFromOptions(mp[cp])); 7406 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7407 mptmp[cp] = PETSC_TRUE; 7408 cp++; 7409 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7410 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7411 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7412 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7413 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7414 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7415 mp[cp]->product->api_user = product->api_user; 7416 PetscCall(MatProductSetFromOptions(mp[cp])); 7417 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7418 rmapt[cp] = 2; 7419 rmapa[cp] = globidx; 7420 cmapt[cp] = 2; 7421 cmapa[cp] = P_oth_idx; 7422 mptmp[cp] = PETSC_FALSE; 7423 cp++; 7424 } 7425 break; 7426 default: 7427 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7428 } 7429 /* sanity check */ 7430 if (size > 1) 7431 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7432 7433 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7434 for (i = 0; i < cp; i++) { 7435 mmdata->mp[i] = mp[i]; 7436 mmdata->mptmp[i] = mptmp[i]; 7437 } 7438 mmdata->cp = cp; 7439 C->product->data = mmdata; 7440 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7441 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7442 7443 /* memory type */ 7444 mmdata->mtype = PETSC_MEMTYPE_HOST; 7445 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7446 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7447 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7448 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7449 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7450 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7451 7452 /* prepare coo coordinates for values insertion */ 7453 7454 /* count total nonzeros of those intermediate seqaij Mats 7455 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7456 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7457 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7458 */ 7459 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7460 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7461 if (mptmp[cp]) continue; 7462 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7463 const PetscInt *rmap = rmapa[cp]; 7464 const PetscInt mr = mp[cp]->rmap->n; 7465 const PetscInt rs = C->rmap->rstart; 7466 const PetscInt re = C->rmap->rend; 7467 const PetscInt *ii = mm->i; 7468 for (i = 0; i < mr; i++) { 7469 const PetscInt gr = rmap[i]; 7470 const PetscInt nz = ii[i + 1] - ii[i]; 7471 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7472 else ncoo_oown += nz; /* this row is local */ 7473 } 7474 } else ncoo_d += mm->nz; 7475 } 7476 7477 /* 7478 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7479 7480 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7481 7482 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7483 7484 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7485 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7486 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7487 7488 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7489 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7490 */ 7491 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7492 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7493 7494 /* gather (i,j) of nonzeros inserted by remote procs */ 7495 if (hasoffproc) { 7496 PetscSF msf; 7497 PetscInt ncoo2, *coo_i2, *coo_j2; 7498 7499 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7500 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7501 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7502 7503 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7504 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7505 PetscInt *idxoff = mmdata->off[cp]; 7506 PetscInt *idxown = mmdata->own[cp]; 7507 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7508 const PetscInt *rmap = rmapa[cp]; 7509 const PetscInt *cmap = cmapa[cp]; 7510 const PetscInt *ii = mm->i; 7511 PetscInt *coi = coo_i + ncoo_o; 7512 PetscInt *coj = coo_j + ncoo_o; 7513 const PetscInt mr = mp[cp]->rmap->n; 7514 const PetscInt rs = C->rmap->rstart; 7515 const PetscInt re = C->rmap->rend; 7516 const PetscInt cs = C->cmap->rstart; 7517 for (i = 0; i < mr; i++) { 7518 const PetscInt *jj = mm->j + ii[i]; 7519 const PetscInt gr = rmap[i]; 7520 const PetscInt nz = ii[i + 1] - ii[i]; 7521 if (gr < rs || gr >= re) { /* this is an offproc row */ 7522 for (j = ii[i]; j < ii[i + 1]; j++) { 7523 *coi++ = gr; 7524 *idxoff++ = j; 7525 } 7526 if (!cmapt[cp]) { /* already global */ 7527 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7528 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7529 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7530 } else { /* offdiag */ 7531 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7532 } 7533 ncoo_o += nz; 7534 } else { /* this is a local row */ 7535 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7536 } 7537 } 7538 } 7539 mmdata->off[cp + 1] = idxoff; 7540 mmdata->own[cp + 1] = idxown; 7541 } 7542 7543 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7544 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7545 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7546 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7547 ncoo = ncoo_d + ncoo_oown + ncoo2; 7548 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7549 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7550 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7551 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7552 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7553 PetscCall(PetscFree2(coo_i, coo_j)); 7554 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7555 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7556 coo_i = coo_i2; 7557 coo_j = coo_j2; 7558 } else { /* no offproc values insertion */ 7559 ncoo = ncoo_d; 7560 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7561 7562 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7563 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7564 PetscCall(PetscSFSetUp(mmdata->sf)); 7565 } 7566 mmdata->hasoffproc = hasoffproc; 7567 7568 /* gather (i,j) of nonzeros inserted locally */ 7569 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7570 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7571 PetscInt *coi = coo_i + ncoo_d; 7572 PetscInt *coj = coo_j + ncoo_d; 7573 const PetscInt *jj = mm->j; 7574 const PetscInt *ii = mm->i; 7575 const PetscInt *cmap = cmapa[cp]; 7576 const PetscInt *rmap = rmapa[cp]; 7577 const PetscInt mr = mp[cp]->rmap->n; 7578 const PetscInt rs = C->rmap->rstart; 7579 const PetscInt re = C->rmap->rend; 7580 const PetscInt cs = C->cmap->rstart; 7581 7582 if (mptmp[cp]) continue; 7583 if (rmapt[cp] == 1) { /* consecutive rows */ 7584 /* fill coo_i */ 7585 for (i = 0; i < mr; i++) { 7586 const PetscInt gr = i + rs; 7587 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7588 } 7589 /* fill coo_j */ 7590 if (!cmapt[cp]) { /* type-0, already global */ 7591 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7592 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7593 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7594 } else { /* type-2, local to global for sparse columns */ 7595 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7596 } 7597 ncoo_d += mm->nz; 7598 } else if (rmapt[cp] == 2) { /* sparse rows */ 7599 for (i = 0; i < mr; i++) { 7600 const PetscInt *jj = mm->j + ii[i]; 7601 const PetscInt gr = rmap[i]; 7602 const PetscInt nz = ii[i + 1] - ii[i]; 7603 if (gr >= rs && gr < re) { /* local rows */ 7604 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7605 if (!cmapt[cp]) { /* type-0, already global */ 7606 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7607 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7608 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7609 } else { /* type-2, local to global for sparse columns */ 7610 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7611 } 7612 ncoo_d += nz; 7613 } 7614 } 7615 } 7616 } 7617 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7618 PetscCall(ISDestroy(&glob)); 7619 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7620 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7621 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7622 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7623 7624 /* preallocate with COO data */ 7625 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7626 PetscCall(PetscFree2(coo_i, coo_j)); 7627 PetscFunctionReturn(PETSC_SUCCESS); 7628 } 7629 7630 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7631 { 7632 Mat_Product *product = mat->product; 7633 #if defined(PETSC_HAVE_DEVICE) 7634 PetscBool match = PETSC_FALSE; 7635 PetscBool usecpu = PETSC_FALSE; 7636 #else 7637 PetscBool match = PETSC_TRUE; 7638 #endif 7639 7640 PetscFunctionBegin; 7641 MatCheckProduct(mat, 1); 7642 #if defined(PETSC_HAVE_DEVICE) 7643 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7644 if (match) { /* we can always fallback to the CPU if requested */ 7645 switch (product->type) { 7646 case MATPRODUCT_AB: 7647 if (product->api_user) { 7648 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7649 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7650 PetscOptionsEnd(); 7651 } else { 7652 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7653 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7654 PetscOptionsEnd(); 7655 } 7656 break; 7657 case MATPRODUCT_AtB: 7658 if (product->api_user) { 7659 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7660 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7661 PetscOptionsEnd(); 7662 } else { 7663 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7664 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7665 PetscOptionsEnd(); 7666 } 7667 break; 7668 case MATPRODUCT_PtAP: 7669 if (product->api_user) { 7670 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7671 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7672 PetscOptionsEnd(); 7673 } else { 7674 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7675 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7676 PetscOptionsEnd(); 7677 } 7678 break; 7679 default: 7680 break; 7681 } 7682 match = (PetscBool)!usecpu; 7683 } 7684 #endif 7685 if (match) { 7686 switch (product->type) { 7687 case MATPRODUCT_AB: 7688 case MATPRODUCT_AtB: 7689 case MATPRODUCT_PtAP: 7690 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7691 break; 7692 default: 7693 break; 7694 } 7695 } 7696 /* fallback to MPIAIJ ops */ 7697 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7698 PetscFunctionReturn(PETSC_SUCCESS); 7699 } 7700 7701 /* 7702 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7703 7704 n - the number of block indices in cc[] 7705 cc - the block indices (must be large enough to contain the indices) 7706 */ 7707 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7708 { 7709 PetscInt cnt = -1, nidx, j; 7710 const PetscInt *idx; 7711 7712 PetscFunctionBegin; 7713 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7714 if (nidx) { 7715 cnt = 0; 7716 cc[cnt] = idx[0] / bs; 7717 for (j = 1; j < nidx; j++) { 7718 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7719 } 7720 } 7721 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7722 *n = cnt + 1; 7723 PetscFunctionReturn(PETSC_SUCCESS); 7724 } 7725 7726 /* 7727 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7728 7729 ncollapsed - the number of block indices 7730 collapsed - the block indices (must be large enough to contain the indices) 7731 */ 7732 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7733 { 7734 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7735 7736 PetscFunctionBegin; 7737 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7738 for (i = start + 1; i < start + bs; i++) { 7739 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7740 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7741 cprevtmp = cprev; 7742 cprev = merged; 7743 merged = cprevtmp; 7744 } 7745 *ncollapsed = nprev; 7746 if (collapsed) *collapsed = cprev; 7747 PetscFunctionReturn(PETSC_SUCCESS); 7748 } 7749 7750 /* 7751 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7752 7753 Input Parameter: 7754 . Amat - matrix 7755 - symmetrize - make the result symmetric 7756 + scale - scale with diagonal 7757 7758 Output Parameter: 7759 . a_Gmat - output scalar graph >= 0 7760 7761 */ 7762 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7763 { 7764 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7765 MPI_Comm comm; 7766 Mat Gmat; 7767 PetscBool ismpiaij, isseqaij; 7768 Mat a, b, c; 7769 MatType jtype; 7770 7771 PetscFunctionBegin; 7772 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7773 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7774 PetscCall(MatGetSize(Amat, &MM, &NN)); 7775 PetscCall(MatGetBlockSize(Amat, &bs)); 7776 nloc = (Iend - Istart) / bs; 7777 7778 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7779 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7780 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7781 7782 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7783 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7784 implementation */ 7785 if (bs > 1) { 7786 PetscCall(MatGetType(Amat, &jtype)); 7787 PetscCall(MatCreate(comm, &Gmat)); 7788 PetscCall(MatSetType(Gmat, jtype)); 7789 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7790 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7791 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7792 PetscInt *d_nnz, *o_nnz; 7793 MatScalar *aa, val, *AA; 7794 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7795 if (isseqaij) { 7796 a = Amat; 7797 b = NULL; 7798 } else { 7799 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7800 a = d->A; 7801 b = d->B; 7802 } 7803 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7804 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7805 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7806 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7807 const PetscInt *cols1, *cols2; 7808 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7809 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7810 nnz[brow / bs] = nc2 / bs; 7811 if (nc2 % bs) ok = 0; 7812 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7813 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7814 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7815 if (nc1 != nc2) ok = 0; 7816 else { 7817 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7818 if (cols1[jj] != cols2[jj]) ok = 0; 7819 if (cols1[jj] % bs != jj % bs) ok = 0; 7820 } 7821 } 7822 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7823 } 7824 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7825 if (!ok) { 7826 PetscCall(PetscFree2(d_nnz, o_nnz)); 7827 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7828 goto old_bs; 7829 } 7830 } 7831 } 7832 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7833 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7834 PetscCall(PetscFree2(d_nnz, o_nnz)); 7835 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7836 // diag 7837 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7838 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7839 ai = aseq->i; 7840 n = ai[brow + 1] - ai[brow]; 7841 aj = aseq->j + ai[brow]; 7842 for (int k = 0; k < n; k += bs) { // block columns 7843 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7844 val = 0; 7845 if (index_size == 0) { 7846 for (int ii = 0; ii < bs; ii++) { // rows in block 7847 aa = aseq->a + ai[brow + ii] + k; 7848 for (int jj = 0; jj < bs; jj++) { // columns in block 7849 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7850 } 7851 } 7852 } else { // use (index,index) value if provided 7853 for (int iii = 0; iii < index_size; iii++) { // rows in block 7854 int ii = index[iii]; 7855 aa = aseq->a + ai[brow + ii] + k; 7856 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7857 int jj = index[jjj]; 7858 val += PetscAbs(PetscRealPart(aa[jj])); 7859 } 7860 } 7861 } 7862 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7863 AA[k / bs] = val; 7864 } 7865 grow = Istart / bs + brow / bs; 7866 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7867 } 7868 // off-diag 7869 if (ismpiaij) { 7870 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7871 const PetscScalar *vals; 7872 const PetscInt *cols, *garray = aij->garray; 7873 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7874 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7875 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7876 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7877 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7878 AA[k / bs] = 0; 7879 AJ[cidx] = garray[cols[k]] / bs; 7880 } 7881 nc = ncols / bs; 7882 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7883 if (index_size == 0) { 7884 for (int ii = 0; ii < bs; ii++) { // rows in block 7885 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7886 for (int k = 0; k < ncols; k += bs) { 7887 for (int jj = 0; jj < bs; jj++) { // cols in block 7888 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7889 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7890 } 7891 } 7892 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7893 } 7894 } else { // use (index,index) value if provided 7895 for (int iii = 0; iii < index_size; iii++) { // rows in block 7896 int ii = index[iii]; 7897 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7898 for (int k = 0; k < ncols; k += bs) { 7899 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7900 int jj = index[jjj]; 7901 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7902 } 7903 } 7904 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7905 } 7906 } 7907 grow = Istart / bs + brow / bs; 7908 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7909 } 7910 } 7911 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7912 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7913 PetscCall(PetscFree2(AA, AJ)); 7914 } else { 7915 const PetscScalar *vals; 7916 const PetscInt *idx; 7917 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7918 old_bs: 7919 /* 7920 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7921 */ 7922 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7923 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7924 if (isseqaij) { 7925 PetscInt max_d_nnz; 7926 /* 7927 Determine exact preallocation count for (sequential) scalar matrix 7928 */ 7929 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7930 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7931 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7932 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7933 PetscCall(PetscFree3(w0, w1, w2)); 7934 } else if (ismpiaij) { 7935 Mat Daij, Oaij; 7936 const PetscInt *garray; 7937 PetscInt max_d_nnz; 7938 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7939 /* 7940 Determine exact preallocation count for diagonal block portion of scalar matrix 7941 */ 7942 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7943 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7944 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7945 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7946 PetscCall(PetscFree3(w0, w1, w2)); 7947 /* 7948 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7949 */ 7950 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7951 o_nnz[jj] = 0; 7952 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7953 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7954 o_nnz[jj] += ncols; 7955 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7956 } 7957 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7958 } 7959 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7960 /* get scalar copy (norms) of matrix */ 7961 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7962 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7963 PetscCall(PetscFree2(d_nnz, o_nnz)); 7964 for (Ii = Istart; Ii < Iend; Ii++) { 7965 PetscInt dest_row = Ii / bs; 7966 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7967 for (jj = 0; jj < ncols; jj++) { 7968 PetscInt dest_col = idx[jj] / bs; 7969 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7970 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7971 } 7972 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7973 } 7974 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7975 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7976 } 7977 } else { 7978 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7979 else { 7980 Gmat = Amat; 7981 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7982 } 7983 if (isseqaij) { 7984 a = Gmat; 7985 b = NULL; 7986 } else { 7987 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7988 a = d->A; 7989 b = d->B; 7990 } 7991 if (filter >= 0 || scale) { 7992 /* take absolute value of each entry */ 7993 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7994 MatInfo info; 7995 PetscScalar *avals; 7996 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7997 PetscCall(MatSeqAIJGetArray(c, &avals)); 7998 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7999 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8000 } 8001 } 8002 } 8003 if (symmetrize) { 8004 PetscBool isset, issym; 8005 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8006 if (!isset || !issym) { 8007 Mat matTrans; 8008 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8009 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8010 PetscCall(MatDestroy(&matTrans)); 8011 } 8012 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8013 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8014 if (scale) { 8015 /* scale c for all diagonal values = 1 or -1 */ 8016 Vec diag; 8017 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8018 PetscCall(MatGetDiagonal(Gmat, diag)); 8019 PetscCall(VecReciprocal(diag)); 8020 PetscCall(VecSqrtAbs(diag)); 8021 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8022 PetscCall(VecDestroy(&diag)); 8023 } 8024 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8025 8026 if (filter >= 0) { 8027 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8028 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8029 } 8030 *a_Gmat = Gmat; 8031 PetscFunctionReturn(PETSC_SUCCESS); 8032 } 8033 8034 /* 8035 Special version for direct calls from Fortran 8036 */ 8037 #include <petsc/private/fortranimpl.h> 8038 8039 /* Change these macros so can be used in void function */ 8040 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8041 #undef PetscCall 8042 #define PetscCall(...) \ 8043 do { \ 8044 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8045 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8046 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8047 return; \ 8048 } \ 8049 } while (0) 8050 8051 #undef SETERRQ 8052 #define SETERRQ(comm, ierr, ...) \ 8053 do { \ 8054 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8055 return; \ 8056 } while (0) 8057 8058 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8059 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8060 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8061 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8062 #else 8063 #endif 8064 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8065 { 8066 Mat mat = *mmat; 8067 PetscInt m = *mm, n = *mn; 8068 InsertMode addv = *maddv; 8069 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8070 PetscScalar value; 8071 8072 MatCheckPreallocated(mat, 1); 8073 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8074 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8075 { 8076 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8077 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8078 PetscBool roworiented = aij->roworiented; 8079 8080 /* Some Variables required in the macro */ 8081 Mat A = aij->A; 8082 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8083 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8084 MatScalar *aa; 8085 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8086 Mat B = aij->B; 8087 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8088 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8089 MatScalar *ba; 8090 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8091 * cannot use "#if defined" inside a macro. */ 8092 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8093 8094 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8095 PetscInt nonew = a->nonew; 8096 MatScalar *ap1, *ap2; 8097 8098 PetscFunctionBegin; 8099 PetscCall(MatSeqAIJGetArray(A, &aa)); 8100 PetscCall(MatSeqAIJGetArray(B, &ba)); 8101 for (i = 0; i < m; i++) { 8102 if (im[i] < 0) continue; 8103 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8104 if (im[i] >= rstart && im[i] < rend) { 8105 row = im[i] - rstart; 8106 lastcol1 = -1; 8107 rp1 = aj + ai[row]; 8108 ap1 = aa + ai[row]; 8109 rmax1 = aimax[row]; 8110 nrow1 = ailen[row]; 8111 low1 = 0; 8112 high1 = nrow1; 8113 lastcol2 = -1; 8114 rp2 = bj + bi[row]; 8115 ap2 = ba + bi[row]; 8116 rmax2 = bimax[row]; 8117 nrow2 = bilen[row]; 8118 low2 = 0; 8119 high2 = nrow2; 8120 8121 for (j = 0; j < n; j++) { 8122 if (roworiented) value = v[i * n + j]; 8123 else value = v[i + j * m]; 8124 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8125 if (in[j] >= cstart && in[j] < cend) { 8126 col = in[j] - cstart; 8127 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8128 } else if (in[j] < 0) continue; 8129 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8130 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8131 } else { 8132 if (mat->was_assembled) { 8133 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8134 #if defined(PETSC_USE_CTABLE) 8135 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8136 col--; 8137 #else 8138 col = aij->colmap[in[j]] - 1; 8139 #endif 8140 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8141 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8142 col = in[j]; 8143 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8144 B = aij->B; 8145 b = (Mat_SeqAIJ *)B->data; 8146 bimax = b->imax; 8147 bi = b->i; 8148 bilen = b->ilen; 8149 bj = b->j; 8150 rp2 = bj + bi[row]; 8151 ap2 = ba + bi[row]; 8152 rmax2 = bimax[row]; 8153 nrow2 = bilen[row]; 8154 low2 = 0; 8155 high2 = nrow2; 8156 bm = aij->B->rmap->n; 8157 ba = b->a; 8158 inserted = PETSC_FALSE; 8159 } 8160 } else col = in[j]; 8161 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8162 } 8163 } 8164 } else if (!aij->donotstash) { 8165 if (roworiented) { 8166 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8167 } else { 8168 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8169 } 8170 } 8171 } 8172 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8173 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8174 } 8175 PetscFunctionReturnVoid(); 8176 } 8177 8178 /* Undefining these here since they were redefined from their original definition above! No 8179 * other PETSc functions should be defined past this point, as it is impossible to recover the 8180 * original definitions */ 8181 #undef PetscCall 8182 #undef SETERRQ 8183