1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 167 PetscFunctionReturn(PETSC_SUCCESS); 168 } 169 170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 171 { 172 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 173 174 PetscFunctionBegin; 175 if (mat->A) { 176 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 177 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 178 } 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181 182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 183 { 184 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 186 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 187 const PetscInt *ia, *ib; 188 const MatScalar *aa, *bb, *aav, *bav; 189 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 190 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 191 192 PetscFunctionBegin; 193 *keptrows = NULL; 194 195 ia = a->i; 196 ib = b->i; 197 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 198 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 199 for (i = 0; i < m; i++) { 200 na = ia[i + 1] - ia[i]; 201 nb = ib[i + 1] - ib[i]; 202 if (!na && !nb) { 203 cnt++; 204 goto ok1; 205 } 206 aa = aav + ia[i]; 207 for (j = 0; j < na; j++) { 208 if (aa[j] != 0.0) goto ok1; 209 } 210 bb = PetscSafePointerPlusOffset(bav, ib[i]); 211 for (j = 0; j < nb; j++) { 212 if (bb[j] != 0.0) goto ok1; 213 } 214 cnt++; 215 ok1:; 216 } 217 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 218 if (!n0rows) { 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 220 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 221 PetscFunctionReturn(PETSC_SUCCESS); 222 } 223 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 224 cnt = 0; 225 for (i = 0; i < m; i++) { 226 na = ia[i + 1] - ia[i]; 227 nb = ib[i + 1] - ib[i]; 228 if (!na && !nb) continue; 229 aa = aav + ia[i]; 230 for (j = 0; j < na; j++) { 231 if (aa[j] != 0.0) { 232 rows[cnt++] = rstart + i; 233 goto ok2; 234 } 235 } 236 bb = PetscSafePointerPlusOffset(bav, ib[i]); 237 for (j = 0; j < nb; j++) { 238 if (bb[j] != 0.0) { 239 rows[cnt++] = rstart + i; 240 goto ok2; 241 } 242 } 243 ok2:; 244 } 245 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 247 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 252 { 253 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 254 PetscBool cong; 255 256 PetscFunctionBegin; 257 PetscCall(MatHasCongruentLayouts(Y, &cong)); 258 if (Y->assembled && cong) { 259 PetscCall(MatDiagonalSet(aij->A, D, is)); 260 } else { 261 PetscCall(MatDiagonalSet_Default(Y, D, is)); 262 } 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 267 { 268 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 269 PetscInt i, rstart, nrows, *rows; 270 271 PetscFunctionBegin; 272 *zrows = NULL; 273 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 274 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 275 for (i = 0; i < nrows; i++) rows[i] += rstart; 276 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 283 PetscInt i, m, n, *garray = aij->garray; 284 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 285 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 286 PetscReal *work; 287 const PetscScalar *dummy; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 if (type == NORM_INFINITY) { 313 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 314 } else { 315 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 316 } 317 PetscCall(PetscFree(work)); 318 if (type == NORM_2) { 319 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 320 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 321 for (i = 0; i < n; i++) reductions[i] /= m; 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 327 { 328 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 329 IS sis, gis; 330 const PetscInt *isis, *igis; 331 PetscInt n, *iis, nsis, ngis, rstart, i; 332 333 PetscFunctionBegin; 334 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 335 PetscCall(MatFindNonzeroRows(a->B, &gis)); 336 PetscCall(ISGetSize(gis, &ngis)); 337 PetscCall(ISGetSize(sis, &nsis)); 338 PetscCall(ISGetIndices(sis, &isis)); 339 PetscCall(ISGetIndices(gis, &igis)); 340 341 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 342 PetscCall(PetscArraycpy(iis, igis, ngis)); 343 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 344 n = ngis + nsis; 345 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 346 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 347 for (i = 0; i < n; i++) iis[i] += rstart; 348 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 349 350 PetscCall(ISRestoreIndices(sis, &isis)); 351 PetscCall(ISRestoreIndices(gis, &igis)); 352 PetscCall(ISDestroy(&sis)); 353 PetscCall(ISDestroy(&gis)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 /* 358 Local utility routine that creates a mapping from the global column 359 number to the local number in the off-diagonal part of the local 360 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 361 a slightly higher hash table cost; without it it is not scalable (each processor 362 has an order N integer array but is fast to access. 363 */ 364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 365 { 366 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 367 PetscInt n = aij->B->cmap->n, i; 368 369 PetscFunctionBegin; 370 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 371 #if defined(PETSC_USE_CTABLE) 372 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 373 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 374 #else 375 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 376 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 377 #endif 378 PetscFunctionReturn(PETSC_SUCCESS); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 382 do { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col; \ 386 while (high1 - low1 > 5) { \ 387 t = (low1 + high1) / 2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i = low1; _i < high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) { \ 395 ap1[_i] += value; \ 396 /* Not sure LogFlops will slow dow the code or not */ \ 397 (void)PetscLogFlops(1.0); \ 398 } else ap1[_i] = value; \ 399 goto a_noinsert; \ 400 } \ 401 } \ 402 if (value == 0.0 && ignorezeroentries && row != col) { \ 403 low1 = 0; \ 404 high1 = nrow1; \ 405 goto a_noinsert; \ 406 } \ 407 if (nonew == 1) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 413 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 414 N = nrow1++ - 1; \ 415 a->nz++; \ 416 high1++; \ 417 /* shift up all the later entries in this row */ \ 418 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 419 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 B->nonzerostate++; \ 468 b_noinsert:; \ 469 bilen[row] = nrow2; \ 470 } while (0) 471 472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 473 { 474 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 475 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 476 PetscInt l, *garray = mat->garray, diag; 477 PetscScalar *aa, *ba; 478 479 PetscFunctionBegin; 480 /* code only works for square matrices A */ 481 482 /* find size of row to the left of the diagonal part */ 483 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 484 row = row - diag; 485 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 486 if (garray[b->j[b->i[row] + l]] > diag) break; 487 } 488 if (l) { 489 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 490 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 491 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 492 } 493 494 /* diagonal part */ 495 if (a->i[row + 1] - a->i[row]) { 496 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 497 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 498 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 499 } 500 501 /* right of diagonal part */ 502 if (b->i[row + 1] - b->i[row] - l) { 503 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 504 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 505 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 506 } 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 511 { 512 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 513 PetscScalar value = 0.0; 514 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 515 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 516 PetscBool roworiented = aij->roworiented; 517 518 /* Some Variables required in the macro */ 519 Mat A = aij->A; 520 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 521 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 522 PetscBool ignorezeroentries = a->ignorezeroentries; 523 Mat B = aij->B; 524 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 525 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 526 MatScalar *aa, *ba; 527 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 528 PetscInt nonew; 529 MatScalar *ap1, *ap2; 530 531 PetscFunctionBegin; 532 PetscCall(MatSeqAIJGetArray(A, &aa)); 533 PetscCall(MatSeqAIJGetArray(B, &ba)); 534 for (i = 0; i < m; i++) { 535 if (im[i] < 0) continue; 536 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 537 if (im[i] >= rstart && im[i] < rend) { 538 row = im[i] - rstart; 539 lastcol1 = -1; 540 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 541 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 542 rmax1 = aimax[row]; 543 nrow1 = ailen[row]; 544 low1 = 0; 545 high1 = nrow1; 546 lastcol2 = -1; 547 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 548 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 549 rmax2 = bimax[row]; 550 nrow2 = bilen[row]; 551 low2 = 0; 552 high2 = nrow2; 553 554 for (j = 0; j < n; j++) { 555 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 556 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 557 if (in[j] >= cstart && in[j] < cend) { 558 col = in[j] - cstart; 559 nonew = a->nonew; 560 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 561 } else if (in[j] < 0) { 562 continue; 563 } else { 564 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 565 if (mat->was_assembled) { 566 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 567 #if defined(PETSC_USE_CTABLE) 568 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 569 col--; 570 #else 571 col = aij->colmap[in[j]] - 1; 572 #endif 573 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 574 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 575 col = in[j]; 576 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 577 B = aij->B; 578 b = (Mat_SeqAIJ *)B->data; 579 bimax = b->imax; 580 bi = b->i; 581 bilen = b->ilen; 582 bj = b->j; 583 ba = b->a; 584 rp2 = bj + bi[row]; 585 ap2 = ba + bi[row]; 586 rmax2 = bimax[row]; 587 nrow2 = bilen[row]; 588 low2 = 0; 589 high2 = nrow2; 590 bm = aij->B->rmap->n; 591 ba = b->a; 592 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 593 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 594 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 595 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 596 } 597 } else col = in[j]; 598 nonew = b->nonew; 599 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 600 } 601 } 602 } else { 603 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 604 if (!aij->donotstash) { 605 mat->assembled = PETSC_FALSE; 606 if (roworiented) { 607 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 608 } else { 609 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 610 } 611 } 612 } 613 } 614 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 615 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 /* 620 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 621 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 622 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 623 */ 624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 627 Mat A = aij->A; /* diagonal part of the matrix */ 628 Mat B = aij->B; /* off-diagonal part of the matrix */ 629 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 630 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 632 PetscInt *ailen = a->ilen, *aj = a->j; 633 PetscInt *bilen = b->ilen, *bj = b->j; 634 PetscInt am = aij->A->rmap->n, j; 635 PetscInt diag_so_far = 0, dnz; 636 PetscInt offd_so_far = 0, onz; 637 638 PetscFunctionBegin; 639 /* Iterate over all rows of the matrix */ 640 for (j = 0; j < am; j++) { 641 dnz = onz = 0; 642 /* Iterate over all non-zero columns of the current row */ 643 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 644 /* If column is in the diagonal */ 645 if (mat_j[col] >= cstart && mat_j[col] < cend) { 646 aj[diag_so_far++] = mat_j[col] - cstart; 647 dnz++; 648 } else { /* off-diagonal entries */ 649 bj[offd_so_far++] = mat_j[col]; 650 onz++; 651 } 652 } 653 ailen[j] = dnz; 654 bilen[j] = onz; 655 } 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 /* 660 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 661 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 662 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 663 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 664 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 665 */ 666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 669 Mat A = aij->A; /* diagonal part of the matrix */ 670 Mat B = aij->B; /* off-diagonal part of the matrix */ 671 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 673 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 674 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 675 PetscInt *ailen = a->ilen, *aj = a->j; 676 PetscInt *bilen = b->ilen, *bj = b->j; 677 PetscInt am = aij->A->rmap->n, j; 678 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 679 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 680 PetscScalar *aa = a->a, *ba = b->a; 681 682 PetscFunctionBegin; 683 /* Iterate over all rows of the matrix */ 684 for (j = 0; j < am; j++) { 685 dnz_row = onz_row = 0; 686 rowstart_offd = full_offd_i[j]; 687 rowstart_diag = full_diag_i[j]; 688 /* Iterate over all non-zero columns of the current row */ 689 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 690 /* If column is in the diagonal */ 691 if (mat_j[col] >= cstart && mat_j[col] < cend) { 692 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 693 aa[rowstart_diag + dnz_row] = mat_a[col]; 694 dnz_row++; 695 } else { /* off-diagonal entries */ 696 bj[rowstart_offd + onz_row] = mat_j[col]; 697 ba[rowstart_offd + onz_row] = mat_a[col]; 698 onz_row++; 699 } 700 } 701 ailen[j] = dnz_row; 702 bilen[j] = onz_row; 703 } 704 PetscFunctionReturn(PETSC_SUCCESS); 705 } 706 707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 708 { 709 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 710 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 711 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 712 713 PetscFunctionBegin; 714 for (i = 0; i < m; i++) { 715 if (idxm[i] < 0) continue; /* negative row */ 716 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 717 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 718 row = idxm[i] - rstart; 719 for (j = 0; j < n; j++) { 720 if (idxn[j] < 0) continue; /* negative column */ 721 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 722 if (idxn[j] >= cstart && idxn[j] < cend) { 723 col = idxn[j] - cstart; 724 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 725 } else { 726 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 727 #if defined(PETSC_USE_CTABLE) 728 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 729 col--; 730 #else 731 col = aij->colmap[idxn[j]] - 1; 732 #endif 733 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 734 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 735 } 736 } 737 } 738 PetscFunctionReturn(PETSC_SUCCESS); 739 } 740 741 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 744 PetscInt nstash, reallocs; 745 746 PetscFunctionBegin; 747 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 748 749 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 750 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 751 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 758 PetscMPIInt n; 759 PetscInt i, j, rstart, ncols, flg; 760 PetscInt *row, *col; 761 PetscBool other_disassembled; 762 PetscScalar *val; 763 764 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 765 766 PetscFunctionBegin; 767 if (!aij->donotstash && !mat->nooffprocentries) { 768 while (1) { 769 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 770 if (!flg) break; 771 772 for (i = 0; i < n;) { 773 /* Now identify the consecutive vals belonging to the same row */ 774 for (j = i, rstart = row[j]; j < n; j++) { 775 if (row[j] != rstart) break; 776 } 777 if (j < n) ncols = j - i; 778 else ncols = n - i; 779 /* Now assemble all these values with a single function call */ 780 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 781 i = j; 782 } 783 } 784 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 785 } 786 #if defined(PETSC_HAVE_DEVICE) 787 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 788 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 789 if (mat->boundtocpu) { 790 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 791 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 792 } 793 #endif 794 PetscCall(MatAssemblyBegin(aij->A, mode)); 795 PetscCall(MatAssemblyEnd(aij->A, mode)); 796 797 /* determine if any processor has disassembled, if so we must 798 also disassemble ourself, in order that we may reassemble. */ 799 /* 800 if nonzero structure of submatrix B cannot change then we know that 801 no processor disassembled thus we can skip this stuff 802 */ 803 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 804 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 805 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 806 PetscCall(MatDisAssemble_MPIAIJ(mat)); 807 } 808 } 809 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 810 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 811 #if defined(PETSC_HAVE_DEVICE) 812 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 813 #endif 814 PetscCall(MatAssemblyBegin(aij->B, mode)); 815 PetscCall(MatAssemblyEnd(aij->B, mode)); 816 817 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 818 819 aij->rowvalues = NULL; 820 821 PetscCall(VecDestroy(&aij->diag)); 822 823 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 824 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 825 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 826 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 827 } 828 #if defined(PETSC_HAVE_DEVICE) 829 mat->offloadmask = PETSC_OFFLOAD_BOTH; 830 #endif 831 PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 835 { 836 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 837 838 PetscFunctionBegin; 839 PetscCall(MatZeroEntries(l->A)); 840 PetscCall(MatZeroEntries(l->B)); 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 845 { 846 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 847 PetscInt *lrows; 848 PetscInt r, len; 849 PetscBool cong; 850 851 PetscFunctionBegin; 852 /* get locally owned rows */ 853 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 854 PetscCall(MatHasCongruentLayouts(A, &cong)); 855 /* fix right hand side if needed */ 856 if (x && b) { 857 const PetscScalar *xx; 858 PetscScalar *bb; 859 860 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 861 PetscCall(VecGetArrayRead(x, &xx)); 862 PetscCall(VecGetArray(b, &bb)); 863 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 864 PetscCall(VecRestoreArrayRead(x, &xx)); 865 PetscCall(VecRestoreArray(b, &bb)); 866 } 867 868 if (diag != 0.0 && cong) { 869 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 870 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 871 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 872 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 873 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 874 PetscInt nnwA, nnwB; 875 PetscBool nnzA, nnzB; 876 877 nnwA = aijA->nonew; 878 nnwB = aijB->nonew; 879 nnzA = aijA->keepnonzeropattern; 880 nnzB = aijB->keepnonzeropattern; 881 if (!nnzA) { 882 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 883 aijA->nonew = 0; 884 } 885 if (!nnzB) { 886 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 887 aijB->nonew = 0; 888 } 889 /* Must zero here before the next loop */ 890 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 891 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 892 for (r = 0; r < len; ++r) { 893 const PetscInt row = lrows[r] + A->rmap->rstart; 894 if (row >= A->cmap->N) continue; 895 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 896 } 897 aijA->nonew = nnwA; 898 aijB->nonew = nnwB; 899 } else { 900 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 901 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 902 } 903 PetscCall(PetscFree(lrows)); 904 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 905 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 906 907 /* only change matrix nonzero state if pattern was allowed to be changed */ 908 if (!((Mat_SeqAIJ *)(mat->A->data))->keepnonzeropattern || !((Mat_SeqAIJ *)(mat->A->data))->nonew) { 909 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 910 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 911 } 912 PetscFunctionReturn(PETSC_SUCCESS); 913 } 914 915 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 916 { 917 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 918 PetscMPIInt n = A->rmap->n; 919 PetscInt i, j, r, m, len = 0; 920 PetscInt *lrows, *owners = A->rmap->range; 921 PetscMPIInt p = 0; 922 PetscSFNode *rrows; 923 PetscSF sf; 924 const PetscScalar *xx; 925 PetscScalar *bb, *mask, *aij_a; 926 Vec xmask, lmask; 927 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 928 const PetscInt *aj, *ii, *ridx; 929 PetscScalar *aa; 930 931 PetscFunctionBegin; 932 /* Create SF where leaves are input rows and roots are owned rows */ 933 PetscCall(PetscMalloc1(n, &lrows)); 934 for (r = 0; r < n; ++r) lrows[r] = -1; 935 PetscCall(PetscMalloc1(N, &rrows)); 936 for (r = 0; r < N; ++r) { 937 const PetscInt idx = rows[r]; 938 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 939 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 940 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 941 } 942 rrows[r].rank = p; 943 rrows[r].index = rows[r] - owners[p]; 944 } 945 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 946 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 947 /* Collect flags for rows to be zeroed */ 948 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 950 PetscCall(PetscSFDestroy(&sf)); 951 /* Compress and put in row numbers */ 952 for (r = 0; r < n; ++r) 953 if (lrows[r] >= 0) lrows[len++] = r; 954 /* zero diagonal part of matrix */ 955 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 956 /* handle off-diagonal part of matrix */ 957 PetscCall(MatCreateVecs(A, &xmask, NULL)); 958 PetscCall(VecDuplicate(l->lvec, &lmask)); 959 PetscCall(VecGetArray(xmask, &bb)); 960 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 961 PetscCall(VecRestoreArray(xmask, &bb)); 962 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 964 PetscCall(VecDestroy(&xmask)); 965 if (x && b) { /* this code is buggy when the row and column layout don't match */ 966 PetscBool cong; 967 968 PetscCall(MatHasCongruentLayouts(A, &cong)); 969 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 970 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 972 PetscCall(VecGetArrayRead(l->lvec, &xx)); 973 PetscCall(VecGetArray(b, &bb)); 974 } 975 PetscCall(VecGetArray(lmask, &mask)); 976 /* remove zeroed rows of off-diagonal matrix */ 977 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 978 ii = aij->i; 979 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 980 /* loop over all elements of off process part of matrix zeroing removed columns*/ 981 if (aij->compressedrow.use) { 982 m = aij->compressedrow.nrows; 983 ii = aij->compressedrow.i; 984 ridx = aij->compressedrow.rindex; 985 for (i = 0; i < m; i++) { 986 n = ii[i + 1] - ii[i]; 987 aj = aij->j + ii[i]; 988 aa = aij_a + ii[i]; 989 990 for (j = 0; j < n; j++) { 991 if (PetscAbsScalar(mask[*aj])) { 992 if (b) bb[*ridx] -= *aa * xx[*aj]; 993 *aa = 0.0; 994 } 995 aa++; 996 aj++; 997 } 998 ridx++; 999 } 1000 } else { /* do not use compressed row format */ 1001 m = l->B->rmap->n; 1002 for (i = 0; i < m; i++) { 1003 n = ii[i + 1] - ii[i]; 1004 aj = aij->j + ii[i]; 1005 aa = aij_a + ii[i]; 1006 for (j = 0; j < n; j++) { 1007 if (PetscAbsScalar(mask[*aj])) { 1008 if (b) bb[i] -= *aa * xx[*aj]; 1009 *aa = 0.0; 1010 } 1011 aa++; 1012 aj++; 1013 } 1014 } 1015 } 1016 if (x && b) { 1017 PetscCall(VecRestoreArray(b, &bb)); 1018 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1019 } 1020 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1021 PetscCall(VecRestoreArray(lmask, &mask)); 1022 PetscCall(VecDestroy(&lmask)); 1023 PetscCall(PetscFree(lrows)); 1024 1025 /* only change matrix nonzero state if pattern was allowed to be changed */ 1026 if (!((Mat_SeqAIJ *)(l->A->data))->nonew) { 1027 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1028 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1029 } 1030 PetscFunctionReturn(PETSC_SUCCESS); 1031 } 1032 1033 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1034 { 1035 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1036 PetscInt nt; 1037 VecScatter Mvctx = a->Mvctx; 1038 1039 PetscFunctionBegin; 1040 PetscCall(VecGetLocalSize(xx, &nt)); 1041 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1042 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1043 PetscUseTypeMethod(a->A, mult, xx, yy); 1044 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1045 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1046 PetscFunctionReturn(PETSC_SUCCESS); 1047 } 1048 1049 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1050 { 1051 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1052 1053 PetscFunctionBegin; 1054 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1061 VecScatter Mvctx = a->Mvctx; 1062 1063 PetscFunctionBegin; 1064 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1065 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1066 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1067 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1068 PetscFunctionReturn(PETSC_SUCCESS); 1069 } 1070 1071 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1074 1075 PetscFunctionBegin; 1076 /* do nondiagonal part */ 1077 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1078 /* do local part */ 1079 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1080 /* add partial results together */ 1081 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1083 PetscFunctionReturn(PETSC_SUCCESS); 1084 } 1085 1086 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1087 { 1088 MPI_Comm comm; 1089 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1090 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1091 IS Me, Notme; 1092 PetscInt M, N, first, last, *notme, i; 1093 PetscBool lf; 1094 PetscMPIInt size; 1095 1096 PetscFunctionBegin; 1097 /* Easy test: symmetric diagonal block */ 1098 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1099 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1100 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1101 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1102 PetscCallMPI(MPI_Comm_size(comm, &size)); 1103 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1104 1105 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1106 PetscCall(MatGetSize(Amat, &M, &N)); 1107 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1108 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1109 for (i = 0; i < first; i++) notme[i] = i; 1110 for (i = last; i < M; i++) notme[i - last + first] = i; 1111 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1112 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1113 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1114 Aoff = Aoffs[0]; 1115 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1116 Boff = Boffs[0]; 1117 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1118 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1119 PetscCall(MatDestroyMatrices(1, &Boffs)); 1120 PetscCall(ISDestroy(&Me)); 1121 PetscCall(ISDestroy(&Notme)); 1122 PetscCall(PetscFree(notme)); 1123 PetscFunctionReturn(PETSC_SUCCESS); 1124 } 1125 1126 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1127 { 1128 PetscFunctionBegin; 1129 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1130 PetscFunctionReturn(PETSC_SUCCESS); 1131 } 1132 1133 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1134 { 1135 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1136 1137 PetscFunctionBegin; 1138 /* do nondiagonal part */ 1139 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1140 /* do local part */ 1141 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1142 /* add partial results together */ 1143 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1144 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1145 PetscFunctionReturn(PETSC_SUCCESS); 1146 } 1147 1148 /* 1149 This only works correctly for square matrices where the subblock A->A is the 1150 diagonal block 1151 */ 1152 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1153 { 1154 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1155 1156 PetscFunctionBegin; 1157 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1158 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1159 PetscCall(MatGetDiagonal(a->A, v)); 1160 PetscFunctionReturn(PETSC_SUCCESS); 1161 } 1162 1163 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1164 { 1165 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1166 1167 PetscFunctionBegin; 1168 PetscCall(MatScale(a->A, aa)); 1169 PetscCall(MatScale(a->B, aa)); 1170 PetscFunctionReturn(PETSC_SUCCESS); 1171 } 1172 1173 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1174 { 1175 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1176 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1177 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1178 const PetscInt *garray = aij->garray; 1179 const PetscScalar *aa, *ba; 1180 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1181 PetscInt64 nz, hnz; 1182 PetscInt *rowlens; 1183 PetscInt *colidxs; 1184 PetscScalar *matvals; 1185 PetscMPIInt rank; 1186 1187 PetscFunctionBegin; 1188 PetscCall(PetscViewerSetUp(viewer)); 1189 1190 M = mat->rmap->N; 1191 N = mat->cmap->N; 1192 m = mat->rmap->n; 1193 rs = mat->rmap->rstart; 1194 cs = mat->cmap->rstart; 1195 nz = A->nz + B->nz; 1196 1197 /* write matrix header */ 1198 header[0] = MAT_FILE_CLASSID; 1199 header[1] = M; 1200 header[2] = N; 1201 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1202 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1203 if (rank == 0) { 1204 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1205 else header[3] = (PetscInt)hnz; 1206 } 1207 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1208 1209 /* fill in and store row lengths */ 1210 PetscCall(PetscMalloc1(m, &rowlens)); 1211 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1212 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1213 PetscCall(PetscFree(rowlens)); 1214 1215 /* fill in and store column indices */ 1216 PetscCall(PetscMalloc1(nz, &colidxs)); 1217 for (cnt = 0, i = 0; i < m; i++) { 1218 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1219 if (garray[B->j[jb]] > cs) break; 1220 colidxs[cnt++] = garray[B->j[jb]]; 1221 } 1222 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1223 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1224 } 1225 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1226 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1227 PetscCall(PetscFree(colidxs)); 1228 1229 /* fill in and store nonzero values */ 1230 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1231 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1232 PetscCall(PetscMalloc1(nz, &matvals)); 1233 for (cnt = 0, i = 0; i < m; i++) { 1234 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1235 if (garray[B->j[jb]] > cs) break; 1236 matvals[cnt++] = ba[jb]; 1237 } 1238 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1239 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1240 } 1241 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1242 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1243 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1244 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1245 PetscCall(PetscFree(matvals)); 1246 1247 /* write block size option to the viewer's .info file */ 1248 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1249 PetscFunctionReturn(PETSC_SUCCESS); 1250 } 1251 1252 #include <petscdraw.h> 1253 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1254 { 1255 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1256 PetscMPIInt rank = aij->rank, size = aij->size; 1257 PetscBool isdraw, iascii, isbinary; 1258 PetscViewer sviewer; 1259 PetscViewerFormat format; 1260 1261 PetscFunctionBegin; 1262 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1263 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1264 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1265 if (iascii) { 1266 PetscCall(PetscViewerGetFormat(viewer, &format)); 1267 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1268 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1269 PetscCall(PetscMalloc1(size, &nz)); 1270 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1271 for (i = 0; i < (PetscInt)size; i++) { 1272 nmax = PetscMax(nmax, nz[i]); 1273 nmin = PetscMin(nmin, nz[i]); 1274 navg += nz[i]; 1275 } 1276 PetscCall(PetscFree(nz)); 1277 navg = navg / size; 1278 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 PetscCall(PetscViewerGetFormat(viewer, &format)); 1282 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1283 MatInfo info; 1284 PetscInt *inodes = NULL; 1285 1286 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1287 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1288 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1289 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1290 if (!inodes) { 1291 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1292 (double)info.memory)); 1293 } else { 1294 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1295 (double)info.memory)); 1296 } 1297 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1298 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1299 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1301 PetscCall(PetscViewerFlush(viewer)); 1302 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1303 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1304 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1305 PetscFunctionReturn(PETSC_SUCCESS); 1306 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1307 PetscInt inodecount, inodelimit, *inodes; 1308 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1309 if (inodes) { 1310 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1311 } else { 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1313 } 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1316 PetscFunctionReturn(PETSC_SUCCESS); 1317 } 1318 } else if (isbinary) { 1319 if (size == 1) { 1320 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1321 PetscCall(MatView(aij->A, viewer)); 1322 } else { 1323 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1324 } 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } else if (iascii && size == 1) { 1327 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1328 PetscCall(MatView(aij->A, viewer)); 1329 PetscFunctionReturn(PETSC_SUCCESS); 1330 } else if (isdraw) { 1331 PetscDraw draw; 1332 PetscBool isnull; 1333 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1334 PetscCall(PetscDrawIsNull(draw, &isnull)); 1335 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1336 } 1337 1338 { /* assemble the entire matrix onto first processor */ 1339 Mat A = NULL, Av; 1340 IS isrow, iscol; 1341 1342 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1344 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1345 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1346 /* The commented code uses MatCreateSubMatrices instead */ 1347 /* 1348 Mat *AA, A = NULL, Av; 1349 IS isrow,iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1353 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1354 if (rank == 0) { 1355 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1356 A = AA[0]; 1357 Av = AA[0]; 1358 } 1359 PetscCall(MatDestroySubMatrices(1,&AA)); 1360 */ 1361 PetscCall(ISDestroy(&iscol)); 1362 PetscCall(ISDestroy(&isrow)); 1363 /* 1364 Everyone has to call to draw the matrix since the graphics waits are 1365 synchronized across all processors that share the PetscDraw object 1366 */ 1367 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1368 if (rank == 0) { 1369 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1370 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1371 } 1372 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1373 PetscCall(MatDestroy(&A)); 1374 } 1375 PetscFunctionReturn(PETSC_SUCCESS); 1376 } 1377 1378 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1379 { 1380 PetscBool iascii, isdraw, issocket, isbinary; 1381 1382 PetscFunctionBegin; 1383 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1384 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1385 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1386 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1387 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1388 PetscFunctionReturn(PETSC_SUCCESS); 1389 } 1390 1391 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1392 { 1393 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1394 Vec bb1 = NULL; 1395 PetscBool hasop; 1396 1397 PetscFunctionBegin; 1398 if (flag == SOR_APPLY_UPPER) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 PetscFunctionReturn(PETSC_SUCCESS); 1401 } 1402 1403 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1404 1405 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1406 if (flag & SOR_ZERO_INITIAL_GUESS) { 1407 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1408 its--; 1409 } 1410 1411 while (its--) { 1412 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1413 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1414 1415 /* update rhs: bb1 = bb - B*x */ 1416 PetscCall(VecScale(mat->lvec, -1.0)); 1417 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1418 1419 /* local sweep */ 1420 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1421 } 1422 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1423 if (flag & SOR_ZERO_INITIAL_GUESS) { 1424 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1425 its--; 1426 } 1427 while (its--) { 1428 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1429 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1430 1431 /* update rhs: bb1 = bb - B*x */ 1432 PetscCall(VecScale(mat->lvec, -1.0)); 1433 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1434 1435 /* local sweep */ 1436 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1437 } 1438 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1439 if (flag & SOR_ZERO_INITIAL_GUESS) { 1440 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1441 its--; 1442 } 1443 while (its--) { 1444 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1445 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1446 1447 /* update rhs: bb1 = bb - B*x */ 1448 PetscCall(VecScale(mat->lvec, -1.0)); 1449 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1450 1451 /* local sweep */ 1452 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1453 } 1454 } else if (flag & SOR_EISENSTAT) { 1455 Vec xx1; 1456 1457 PetscCall(VecDuplicate(bb, &xx1)); 1458 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1459 1460 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1461 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1462 if (!mat->diag) { 1463 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1464 PetscCall(MatGetDiagonal(matin, mat->diag)); 1465 } 1466 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1467 if (hasop) { 1468 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1469 } else { 1470 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1471 } 1472 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1473 1474 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1475 1476 /* local sweep */ 1477 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1478 PetscCall(VecAXPY(xx, 1.0, xx1)); 1479 PetscCall(VecDestroy(&xx1)); 1480 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1481 1482 PetscCall(VecDestroy(&bb1)); 1483 1484 matin->factorerrortype = mat->A->factorerrortype; 1485 PetscFunctionReturn(PETSC_SUCCESS); 1486 } 1487 1488 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1489 { 1490 Mat aA, aB, Aperm; 1491 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1492 PetscScalar *aa, *ba; 1493 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1494 PetscSF rowsf, sf; 1495 IS parcolp = NULL; 1496 PetscBool done; 1497 1498 PetscFunctionBegin; 1499 PetscCall(MatGetLocalSize(A, &m, &n)); 1500 PetscCall(ISGetIndices(rowp, &rwant)); 1501 PetscCall(ISGetIndices(colp, &cwant)); 1502 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1503 1504 /* Invert row permutation to find out where my rows should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1506 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1507 PetscCall(PetscSFSetFromOptions(rowsf)); 1508 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1511 1512 /* Invert column permutation to find out where my columns should go */ 1513 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1514 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1515 PetscCall(PetscSFSetFromOptions(sf)); 1516 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1517 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1518 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1519 PetscCall(PetscSFDestroy(&sf)); 1520 1521 PetscCall(ISRestoreIndices(rowp, &rwant)); 1522 PetscCall(ISRestoreIndices(colp, &cwant)); 1523 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1524 1525 /* Find out where my gcols should go */ 1526 PetscCall(MatGetSize(aB, NULL, &ng)); 1527 PetscCall(PetscMalloc1(ng, &gcdest)); 1528 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1529 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1530 PetscCall(PetscSFSetFromOptions(sf)); 1531 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1532 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1533 PetscCall(PetscSFDestroy(&sf)); 1534 1535 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1536 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1537 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1538 for (i = 0; i < m; i++) { 1539 PetscInt row = rdest[i]; 1540 PetscMPIInt rowner; 1541 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1542 for (j = ai[i]; j < ai[i + 1]; j++) { 1543 PetscInt col = cdest[aj[j]]; 1544 PetscMPIInt cowner; 1545 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1546 if (rowner == cowner) dnnz[i]++; 1547 else onnz[i]++; 1548 } 1549 for (j = bi[i]; j < bi[i + 1]; j++) { 1550 PetscInt col = gcdest[bj[j]]; 1551 PetscMPIInt cowner; 1552 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1553 if (rowner == cowner) dnnz[i]++; 1554 else onnz[i]++; 1555 } 1556 } 1557 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1558 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1559 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1560 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1561 PetscCall(PetscSFDestroy(&rowsf)); 1562 1563 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1564 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1565 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1566 for (i = 0; i < m; i++) { 1567 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1568 PetscInt j0, rowlen; 1569 rowlen = ai[i + 1] - ai[i]; 1570 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1571 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1572 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1573 } 1574 rowlen = bi[i + 1] - bi[i]; 1575 for (j0 = j = 0; j < rowlen; j0 = j) { 1576 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1577 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1578 } 1579 } 1580 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1581 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1582 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1583 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1584 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1585 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1586 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1587 PetscCall(PetscFree3(work, rdest, cdest)); 1588 PetscCall(PetscFree(gcdest)); 1589 if (parcolp) PetscCall(ISDestroy(&colp)); 1590 *B = Aperm; 1591 PetscFunctionReturn(PETSC_SUCCESS); 1592 } 1593 1594 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1595 { 1596 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1597 1598 PetscFunctionBegin; 1599 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1600 if (ghosts) *ghosts = aij->garray; 1601 PetscFunctionReturn(PETSC_SUCCESS); 1602 } 1603 1604 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1607 Mat A = mat->A, B = mat->B; 1608 PetscLogDouble isend[5], irecv[5]; 1609 1610 PetscFunctionBegin; 1611 info->block_size = 1.0; 1612 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1613 1614 isend[0] = info->nz_used; 1615 isend[1] = info->nz_allocated; 1616 isend[2] = info->nz_unneeded; 1617 isend[3] = info->memory; 1618 isend[4] = info->mallocs; 1619 1620 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1621 1622 isend[0] += info->nz_used; 1623 isend[1] += info->nz_allocated; 1624 isend[2] += info->nz_unneeded; 1625 isend[3] += info->memory; 1626 isend[4] += info->mallocs; 1627 if (flag == MAT_LOCAL) { 1628 info->nz_used = isend[0]; 1629 info->nz_allocated = isend[1]; 1630 info->nz_unneeded = isend[2]; 1631 info->memory = isend[3]; 1632 info->mallocs = isend[4]; 1633 } else if (flag == MAT_GLOBAL_MAX) { 1634 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } else if (flag == MAT_GLOBAL_SUM) { 1642 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1643 1644 info->nz_used = irecv[0]; 1645 info->nz_allocated = irecv[1]; 1646 info->nz_unneeded = irecv[2]; 1647 info->memory = irecv[3]; 1648 info->mallocs = irecv[4]; 1649 } 1650 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1651 info->fill_ratio_needed = 0; 1652 info->factor_mallocs = 0; 1653 PetscFunctionReturn(PETSC_SUCCESS); 1654 } 1655 1656 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1657 { 1658 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1659 1660 PetscFunctionBegin; 1661 switch (op) { 1662 case MAT_NEW_NONZERO_LOCATIONS: 1663 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1664 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1665 case MAT_KEEP_NONZERO_PATTERN: 1666 case MAT_NEW_NONZERO_LOCATION_ERR: 1667 case MAT_USE_INODES: 1668 case MAT_IGNORE_ZERO_ENTRIES: 1669 case MAT_FORM_EXPLICIT_TRANSPOSE: 1670 MatCheckPreallocated(A, 1); 1671 PetscCall(MatSetOption(a->A, op, flg)); 1672 PetscCall(MatSetOption(a->B, op, flg)); 1673 break; 1674 case MAT_ROW_ORIENTED: 1675 MatCheckPreallocated(A, 1); 1676 a->roworiented = flg; 1677 1678 PetscCall(MatSetOption(a->A, op, flg)); 1679 PetscCall(MatSetOption(a->B, op, flg)); 1680 break; 1681 case MAT_FORCE_DIAGONAL_ENTRIES: 1682 case MAT_SORTED_FULL: 1683 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1684 break; 1685 case MAT_IGNORE_OFF_PROC_ENTRIES: 1686 a->donotstash = flg; 1687 break; 1688 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1689 case MAT_SPD: 1690 case MAT_SYMMETRIC: 1691 case MAT_STRUCTURALLY_SYMMETRIC: 1692 case MAT_HERMITIAN: 1693 case MAT_SYMMETRY_ETERNAL: 1694 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1695 case MAT_SPD_ETERNAL: 1696 /* if the diagonal matrix is square it inherits some of the properties above */ 1697 break; 1698 case MAT_SUBMAT_SINGLEIS: 1699 A->submat_singleis = flg; 1700 break; 1701 case MAT_STRUCTURE_ONLY: 1702 /* The option is handled directly by MatSetOption() */ 1703 break; 1704 default: 1705 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1706 } 1707 PetscFunctionReturn(PETSC_SUCCESS); 1708 } 1709 1710 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1711 { 1712 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1713 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1714 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1715 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1716 PetscInt *cmap, *idx_p; 1717 1718 PetscFunctionBegin; 1719 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1720 mat->getrowactive = PETSC_TRUE; 1721 1722 if (!mat->rowvalues && (idx || v)) { 1723 /* 1724 allocate enough space to hold information from the longest row. 1725 */ 1726 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1727 PetscInt max = 1, tmp; 1728 for (i = 0; i < matin->rmap->n; i++) { 1729 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1730 if (max < tmp) max = tmp; 1731 } 1732 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1733 } 1734 1735 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1736 lrow = row - rstart; 1737 1738 pvA = &vworkA; 1739 pcA = &cworkA; 1740 pvB = &vworkB; 1741 pcB = &cworkB; 1742 if (!v) { 1743 pvA = NULL; 1744 pvB = NULL; 1745 } 1746 if (!idx) { 1747 pcA = NULL; 1748 if (!v) pcB = NULL; 1749 } 1750 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1751 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1752 nztot = nzA + nzB; 1753 1754 cmap = mat->garray; 1755 if (v || idx) { 1756 if (nztot) { 1757 /* Sort by increasing column numbers, assuming A and B already sorted */ 1758 PetscInt imark = -1; 1759 if (v) { 1760 *v = v_p = mat->rowvalues; 1761 for (i = 0; i < nzB; i++) { 1762 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1763 else break; 1764 } 1765 imark = i; 1766 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1767 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1768 } 1769 if (idx) { 1770 *idx = idx_p = mat->rowindices; 1771 if (imark > -1) { 1772 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1773 } else { 1774 for (i = 0; i < nzB; i++) { 1775 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1776 else break; 1777 } 1778 imark = i; 1779 } 1780 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1781 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1782 } 1783 } else { 1784 if (idx) *idx = NULL; 1785 if (v) *v = NULL; 1786 } 1787 } 1788 *nz = nztot; 1789 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1790 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1795 { 1796 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1797 1798 PetscFunctionBegin; 1799 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1800 aij->getrowactive = PETSC_FALSE; 1801 PetscFunctionReturn(PETSC_SUCCESS); 1802 } 1803 1804 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1805 { 1806 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1807 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1808 PetscInt i, j, cstart = mat->cmap->rstart; 1809 PetscReal sum = 0.0; 1810 const MatScalar *v, *amata, *bmata; 1811 1812 PetscFunctionBegin; 1813 if (aij->size == 1) { 1814 PetscCall(MatNorm(aij->A, type, norm)); 1815 } else { 1816 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1817 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1818 if (type == NORM_FROBENIUS) { 1819 v = amata; 1820 for (i = 0; i < amat->nz; i++) { 1821 sum += PetscRealPart(PetscConj(*v) * (*v)); 1822 v++; 1823 } 1824 v = bmata; 1825 for (i = 0; i < bmat->nz; i++) { 1826 sum += PetscRealPart(PetscConj(*v) * (*v)); 1827 v++; 1828 } 1829 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1830 *norm = PetscSqrtReal(*norm); 1831 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1832 } else if (type == NORM_1) { /* max column norm */ 1833 PetscReal *tmp, *tmp2; 1834 PetscInt *jj, *garray = aij->garray; 1835 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1836 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1837 *norm = 0.0; 1838 v = amata; 1839 jj = amat->j; 1840 for (j = 0; j < amat->nz; j++) { 1841 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1842 v++; 1843 } 1844 v = bmata; 1845 jj = bmat->j; 1846 for (j = 0; j < bmat->nz; j++) { 1847 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1848 v++; 1849 } 1850 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1851 for (j = 0; j < mat->cmap->N; j++) { 1852 if (tmp2[j] > *norm) *norm = tmp2[j]; 1853 } 1854 PetscCall(PetscFree(tmp)); 1855 PetscCall(PetscFree(tmp2)); 1856 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1857 } else if (type == NORM_INFINITY) { /* max row norm */ 1858 PetscReal ntemp = 0.0; 1859 for (j = 0; j < aij->A->rmap->n; j++) { 1860 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1861 sum = 0.0; 1862 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1863 sum += PetscAbsScalar(*v); 1864 v++; 1865 } 1866 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1867 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1868 sum += PetscAbsScalar(*v); 1869 v++; 1870 } 1871 if (sum > ntemp) ntemp = sum; 1872 } 1873 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1874 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1875 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1876 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1877 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1878 } 1879 PetscFunctionReturn(PETSC_SUCCESS); 1880 } 1881 1882 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1883 { 1884 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1885 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1886 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1887 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1888 Mat B, A_diag, *B_diag; 1889 const MatScalar *pbv, *bv; 1890 1891 PetscFunctionBegin; 1892 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1893 ma = A->rmap->n; 1894 na = A->cmap->n; 1895 mb = a->B->rmap->n; 1896 nb = a->B->cmap->n; 1897 ai = Aloc->i; 1898 aj = Aloc->j; 1899 bi = Bloc->i; 1900 bj = Bloc->j; 1901 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1902 PetscInt *d_nnz, *g_nnz, *o_nnz; 1903 PetscSFNode *oloc; 1904 PETSC_UNUSED PetscSF sf; 1905 1906 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1907 /* compute d_nnz for preallocation */ 1908 PetscCall(PetscArrayzero(d_nnz, na)); 1909 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1910 /* compute local off-diagonal contributions */ 1911 PetscCall(PetscArrayzero(g_nnz, nb)); 1912 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1913 /* map those to global */ 1914 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1915 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1916 PetscCall(PetscSFSetFromOptions(sf)); 1917 PetscCall(PetscArrayzero(o_nnz, na)); 1918 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1919 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1920 PetscCall(PetscSFDestroy(&sf)); 1921 1922 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1923 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1924 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1925 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1926 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1927 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1928 } else { 1929 B = *matout; 1930 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1931 } 1932 1933 b = (Mat_MPIAIJ *)B->data; 1934 A_diag = a->A; 1935 B_diag = &b->A; 1936 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1937 A_diag_ncol = A_diag->cmap->N; 1938 B_diag_ilen = sub_B_diag->ilen; 1939 B_diag_i = sub_B_diag->i; 1940 1941 /* Set ilen for diagonal of B */ 1942 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1943 1944 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1945 very quickly (=without using MatSetValues), because all writes are local. */ 1946 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1947 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1948 1949 /* copy over the B part */ 1950 PetscCall(PetscMalloc1(bi[mb], &cols)); 1951 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1952 pbv = bv; 1953 row = A->rmap->rstart; 1954 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1955 cols_tmp = cols; 1956 for (i = 0; i < mb; i++) { 1957 ncol = bi[i + 1] - bi[i]; 1958 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1959 row++; 1960 if (pbv) pbv += ncol; 1961 if (cols_tmp) cols_tmp += ncol; 1962 } 1963 PetscCall(PetscFree(cols)); 1964 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1965 1966 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1967 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1968 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1969 *matout = B; 1970 } else { 1971 PetscCall(MatHeaderMerge(A, &B)); 1972 } 1973 PetscFunctionReturn(PETSC_SUCCESS); 1974 } 1975 1976 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1977 { 1978 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1979 Mat a = aij->A, b = aij->B; 1980 PetscInt s1, s2, s3; 1981 1982 PetscFunctionBegin; 1983 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1984 if (rr) { 1985 PetscCall(VecGetLocalSize(rr, &s1)); 1986 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1987 /* Overlap communication with computation. */ 1988 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1989 } 1990 if (ll) { 1991 PetscCall(VecGetLocalSize(ll, &s1)); 1992 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1993 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1994 } 1995 /* scale the diagonal block */ 1996 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1997 1998 if (rr) { 1999 /* Do a scatter end and then right scale the off-diagonal block */ 2000 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2001 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2002 } 2003 PetscFunctionReturn(PETSC_SUCCESS); 2004 } 2005 2006 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2007 { 2008 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2009 2010 PetscFunctionBegin; 2011 PetscCall(MatSetUnfactored(a->A)); 2012 PetscFunctionReturn(PETSC_SUCCESS); 2013 } 2014 2015 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2016 { 2017 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2018 Mat a, b, c, d; 2019 PetscBool flg; 2020 2021 PetscFunctionBegin; 2022 a = matA->A; 2023 b = matA->B; 2024 c = matB->A; 2025 d = matB->B; 2026 2027 PetscCall(MatEqual(a, c, &flg)); 2028 if (flg) PetscCall(MatEqual(b, d, &flg)); 2029 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2030 PetscFunctionReturn(PETSC_SUCCESS); 2031 } 2032 2033 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2034 { 2035 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2036 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2037 2038 PetscFunctionBegin; 2039 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2040 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2041 /* because of the column compression in the off-processor part of the matrix a->B, 2042 the number of columns in a->B and b->B may be different, hence we cannot call 2043 the MatCopy() directly on the two parts. If need be, we can provide a more 2044 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2045 then copying the submatrices */ 2046 PetscCall(MatCopy_Basic(A, B, str)); 2047 } else { 2048 PetscCall(MatCopy(a->A, b->A, str)); 2049 PetscCall(MatCopy(a->B, b->B, str)); 2050 } 2051 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2052 PetscFunctionReturn(PETSC_SUCCESS); 2053 } 2054 2055 /* 2056 Computes the number of nonzeros per row needed for preallocation when X and Y 2057 have different nonzero structure. 2058 */ 2059 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2060 { 2061 PetscInt i, j, k, nzx, nzy; 2062 2063 PetscFunctionBegin; 2064 /* Set the number of nonzeros in the new matrix */ 2065 for (i = 0; i < m; i++) { 2066 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2067 nzx = xi[i + 1] - xi[i]; 2068 nzy = yi[i + 1] - yi[i]; 2069 nnz[i] = 0; 2070 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2071 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2072 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2073 nnz[i]++; 2074 } 2075 for (; k < nzy; k++) nnz[i]++; 2076 } 2077 PetscFunctionReturn(PETSC_SUCCESS); 2078 } 2079 2080 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2081 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2082 { 2083 PetscInt m = Y->rmap->N; 2084 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2085 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2086 2087 PetscFunctionBegin; 2088 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2089 PetscFunctionReturn(PETSC_SUCCESS); 2090 } 2091 2092 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2093 { 2094 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2095 2096 PetscFunctionBegin; 2097 if (str == SAME_NONZERO_PATTERN) { 2098 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2099 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2100 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2101 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2102 } else { 2103 Mat B; 2104 PetscInt *nnz_d, *nnz_o; 2105 2106 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2107 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2108 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2109 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2110 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2111 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2112 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2113 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2114 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2115 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2116 PetscCall(MatHeaderMerge(Y, &B)); 2117 PetscCall(PetscFree(nnz_d)); 2118 PetscCall(PetscFree(nnz_o)); 2119 } 2120 PetscFunctionReturn(PETSC_SUCCESS); 2121 } 2122 2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2124 2125 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2126 { 2127 PetscFunctionBegin; 2128 if (PetscDefined(USE_COMPLEX)) { 2129 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2130 2131 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2132 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2133 } 2134 PetscFunctionReturn(PETSC_SUCCESS); 2135 } 2136 2137 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2138 { 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2140 2141 PetscFunctionBegin; 2142 PetscCall(MatRealPart(a->A)); 2143 PetscCall(MatRealPart(a->B)); 2144 PetscFunctionReturn(PETSC_SUCCESS); 2145 } 2146 2147 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2148 { 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2150 2151 PetscFunctionBegin; 2152 PetscCall(MatImaginaryPart(a->A)); 2153 PetscCall(MatImaginaryPart(a->B)); 2154 PetscFunctionReturn(PETSC_SUCCESS); 2155 } 2156 2157 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2158 { 2159 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2160 PetscInt i, *idxb = NULL, m = A->rmap->n; 2161 PetscScalar *va, *vv; 2162 Vec vB, vA; 2163 const PetscScalar *vb; 2164 2165 PetscFunctionBegin; 2166 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2167 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2168 2169 PetscCall(VecGetArrayWrite(vA, &va)); 2170 if (idx) { 2171 for (i = 0; i < m; i++) { 2172 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2173 } 2174 } 2175 2176 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2177 PetscCall(PetscMalloc1(m, &idxb)); 2178 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2179 2180 PetscCall(VecGetArrayWrite(v, &vv)); 2181 PetscCall(VecGetArrayRead(vB, &vb)); 2182 for (i = 0; i < m; i++) { 2183 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2184 vv[i] = vb[i]; 2185 if (idx) idx[i] = a->garray[idxb[i]]; 2186 } else { 2187 vv[i] = va[i]; 2188 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2189 } 2190 } 2191 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2192 PetscCall(VecRestoreArrayWrite(vA, &va)); 2193 PetscCall(VecRestoreArrayRead(vB, &vb)); 2194 PetscCall(PetscFree(idxb)); 2195 PetscCall(VecDestroy(&vA)); 2196 PetscCall(VecDestroy(&vB)); 2197 PetscFunctionReturn(PETSC_SUCCESS); 2198 } 2199 2200 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2201 { 2202 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2203 PetscInt m = A->rmap->n; 2204 Vec vB, vA; 2205 2206 PetscFunctionBegin; 2207 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2208 PetscCall(MatGetRowSumAbs(a->A, vA)); 2209 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2210 PetscCall(MatGetRowSumAbs(a->B, vB)); 2211 PetscCall(VecAXPY(vA, 1.0, vB)); 2212 PetscCall(VecDestroy(&vB)); 2213 PetscCall(VecCopy(vA, v)); 2214 PetscCall(VecDestroy(&vA)); 2215 PetscFunctionReturn(PETSC_SUCCESS); 2216 } 2217 2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2219 { 2220 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2221 PetscInt m = A->rmap->n, n = A->cmap->n; 2222 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2223 PetscInt *cmap = mat->garray; 2224 PetscInt *diagIdx, *offdiagIdx; 2225 Vec diagV, offdiagV; 2226 PetscScalar *a, *diagA, *offdiagA; 2227 const PetscScalar *ba, *bav; 2228 PetscInt r, j, col, ncols, *bi, *bj; 2229 Mat B = mat->B; 2230 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2231 2232 PetscFunctionBegin; 2233 /* When a process holds entire A and other processes have no entry */ 2234 if (A->cmap->N == n) { 2235 PetscCall(VecGetArrayWrite(v, &diagA)); 2236 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2237 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2238 PetscCall(VecDestroy(&diagV)); 2239 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } else if (n == 0) { 2242 if (m) { 2243 PetscCall(VecGetArrayWrite(v, &a)); 2244 for (r = 0; r < m; r++) { 2245 a[r] = 0.0; 2246 if (idx) idx[r] = -1; 2247 } 2248 PetscCall(VecRestoreArrayWrite(v, &a)); 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2256 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2257 2258 /* Get offdiagIdx[] for implicit 0.0 */ 2259 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2260 ba = bav; 2261 bi = b->i; 2262 bj = b->j; 2263 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2264 for (r = 0; r < m; r++) { 2265 ncols = bi[r + 1] - bi[r]; 2266 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2267 offdiagA[r] = *ba; 2268 offdiagIdx[r] = cmap[0]; 2269 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2270 offdiagA[r] = 0.0; 2271 2272 /* Find first hole in the cmap */ 2273 for (j = 0; j < ncols; j++) { 2274 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2275 if (col > j && j < cstart) { 2276 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2277 break; 2278 } else if (col > j + n && j >= cstart) { 2279 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2280 break; 2281 } 2282 } 2283 if (j == ncols && ncols < A->cmap->N - n) { 2284 /* a hole is outside compressed Bcols */ 2285 if (ncols == 0) { 2286 if (cstart) { 2287 offdiagIdx[r] = 0; 2288 } else offdiagIdx[r] = cend; 2289 } else { /* ncols > 0 */ 2290 offdiagIdx[r] = cmap[ncols - 1] + 1; 2291 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2292 } 2293 } 2294 } 2295 2296 for (j = 0; j < ncols; j++) { 2297 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2298 offdiagA[r] = *ba; 2299 offdiagIdx[r] = cmap[*bj]; 2300 } 2301 ba++; 2302 bj++; 2303 } 2304 } 2305 2306 PetscCall(VecGetArrayWrite(v, &a)); 2307 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2308 for (r = 0; r < m; ++r) { 2309 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) idx[r] = cstart + diagIdx[r]; 2312 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 if (idx) { 2315 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2316 idx[r] = cstart + diagIdx[r]; 2317 } else idx[r] = offdiagIdx[r]; 2318 } 2319 } else { 2320 a[r] = offdiagA[r]; 2321 if (idx) idx[r] = offdiagIdx[r]; 2322 } 2323 } 2324 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2325 PetscCall(VecRestoreArrayWrite(v, &a)); 2326 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2327 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2328 PetscCall(VecDestroy(&diagV)); 2329 PetscCall(VecDestroy(&offdiagV)); 2330 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2335 { 2336 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2337 PetscInt m = A->rmap->n, n = A->cmap->n; 2338 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2339 PetscInt *cmap = mat->garray; 2340 PetscInt *diagIdx, *offdiagIdx; 2341 Vec diagV, offdiagV; 2342 PetscScalar *a, *diagA, *offdiagA; 2343 const PetscScalar *ba, *bav; 2344 PetscInt r, j, col, ncols, *bi, *bj; 2345 Mat B = mat->B; 2346 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2347 2348 PetscFunctionBegin; 2349 /* When a process holds entire A and other processes have no entry */ 2350 if (A->cmap->N == n) { 2351 PetscCall(VecGetArrayWrite(v, &diagA)); 2352 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2353 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2354 PetscCall(VecDestroy(&diagV)); 2355 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } else if (n == 0) { 2358 if (m) { 2359 PetscCall(VecGetArrayWrite(v, &a)); 2360 for (r = 0; r < m; r++) { 2361 a[r] = PETSC_MAX_REAL; 2362 if (idx) idx[r] = -1; 2363 } 2364 PetscCall(VecRestoreArrayWrite(v, &a)); 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2371 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2372 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2373 2374 /* Get offdiagIdx[] for implicit 0.0 */ 2375 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2376 ba = bav; 2377 bi = b->i; 2378 bj = b->j; 2379 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2380 for (r = 0; r < m; r++) { 2381 ncols = bi[r + 1] - bi[r]; 2382 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2383 offdiagA[r] = *ba; 2384 offdiagIdx[r] = cmap[0]; 2385 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2386 offdiagA[r] = 0.0; 2387 2388 /* Find first hole in the cmap */ 2389 for (j = 0; j < ncols; j++) { 2390 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2391 if (col > j && j < cstart) { 2392 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2393 break; 2394 } else if (col > j + n && j >= cstart) { 2395 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2396 break; 2397 } 2398 } 2399 if (j == ncols && ncols < A->cmap->N - n) { 2400 /* a hole is outside compressed Bcols */ 2401 if (ncols == 0) { 2402 if (cstart) { 2403 offdiagIdx[r] = 0; 2404 } else offdiagIdx[r] = cend; 2405 } else { /* ncols > 0 */ 2406 offdiagIdx[r] = cmap[ncols - 1] + 1; 2407 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2408 } 2409 } 2410 } 2411 2412 for (j = 0; j < ncols; j++) { 2413 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2414 offdiagA[r] = *ba; 2415 offdiagIdx[r] = cmap[*bj]; 2416 } 2417 ba++; 2418 bj++; 2419 } 2420 } 2421 2422 PetscCall(VecGetArrayWrite(v, &a)); 2423 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2424 for (r = 0; r < m; ++r) { 2425 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2426 a[r] = diagA[r]; 2427 if (idx) idx[r] = cstart + diagIdx[r]; 2428 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2429 a[r] = diagA[r]; 2430 if (idx) { 2431 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2432 idx[r] = cstart + diagIdx[r]; 2433 } else idx[r] = offdiagIdx[r]; 2434 } 2435 } else { 2436 a[r] = offdiagA[r]; 2437 if (idx) idx[r] = offdiagIdx[r]; 2438 } 2439 } 2440 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2441 PetscCall(VecRestoreArrayWrite(v, &a)); 2442 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2443 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2444 PetscCall(VecDestroy(&diagV)); 2445 PetscCall(VecDestroy(&offdiagV)); 2446 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2447 PetscFunctionReturn(PETSC_SUCCESS); 2448 } 2449 2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2451 { 2452 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2453 PetscInt m = A->rmap->n, n = A->cmap->n; 2454 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2455 PetscInt *cmap = mat->garray; 2456 PetscInt *diagIdx, *offdiagIdx; 2457 Vec diagV, offdiagV; 2458 PetscScalar *a, *diagA, *offdiagA; 2459 const PetscScalar *ba, *bav; 2460 PetscInt r, j, col, ncols, *bi, *bj; 2461 Mat B = mat->B; 2462 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2463 2464 PetscFunctionBegin; 2465 /* When a process holds entire A and other processes have no entry */ 2466 if (A->cmap->N == n) { 2467 PetscCall(VecGetArrayWrite(v, &diagA)); 2468 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2469 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2470 PetscCall(VecDestroy(&diagV)); 2471 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } else if (n == 0) { 2474 if (m) { 2475 PetscCall(VecGetArrayWrite(v, &a)); 2476 for (r = 0; r < m; r++) { 2477 a[r] = PETSC_MIN_REAL; 2478 if (idx) idx[r] = -1; 2479 } 2480 PetscCall(VecRestoreArrayWrite(v, &a)); 2481 } 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } 2484 2485 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2487 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2488 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r + 1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; 2500 offdiagIdx[r] = cmap[0]; 2501 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2502 offdiagA[r] = 0.0; 2503 2504 /* Find first hole in the cmap */ 2505 for (j = 0; j < ncols; j++) { 2506 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2507 if (col > j && j < cstart) { 2508 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2509 break; 2510 } else if (col > j + n && j >= cstart) { 2511 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2512 break; 2513 } 2514 } 2515 if (j == ncols && ncols < A->cmap->N - n) { 2516 /* a hole is outside compressed Bcols */ 2517 if (ncols == 0) { 2518 if (cstart) { 2519 offdiagIdx[r] = 0; 2520 } else offdiagIdx[r] = cend; 2521 } else { /* ncols > 0 */ 2522 offdiagIdx[r] = cmap[ncols - 1] + 1; 2523 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2524 } 2525 } 2526 } 2527 2528 for (j = 0; j < ncols; j++) { 2529 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2530 offdiagA[r] = *ba; 2531 offdiagIdx[r] = cmap[*bj]; 2532 } 2533 ba++; 2534 bj++; 2535 } 2536 } 2537 2538 PetscCall(VecGetArrayWrite(v, &a)); 2539 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2540 for (r = 0; r < m; ++r) { 2541 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2542 a[r] = diagA[r]; 2543 if (idx) idx[r] = cstart + diagIdx[r]; 2544 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 if (idx) { 2547 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2548 idx[r] = cstart + diagIdx[r]; 2549 } else idx[r] = offdiagIdx[r]; 2550 } 2551 } else { 2552 a[r] = offdiagA[r]; 2553 if (idx) idx[r] = offdiagIdx[r]; 2554 } 2555 } 2556 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2557 PetscCall(VecRestoreArrayWrite(v, &a)); 2558 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2559 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2560 PetscCall(VecDestroy(&diagV)); 2561 PetscCall(VecDestroy(&offdiagV)); 2562 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2567 { 2568 Mat *dummy; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2572 *newmat = *dummy; 2573 PetscCall(PetscFree(dummy)); 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2580 2581 PetscFunctionBegin; 2582 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2583 A->factorerrortype = a->A->factorerrortype; 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2588 { 2589 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2590 2591 PetscFunctionBegin; 2592 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2593 PetscCall(MatSetRandom(aij->A, rctx)); 2594 if (x->assembled) { 2595 PetscCall(MatSetRandom(aij->B, rctx)); 2596 } else { 2597 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2598 } 2599 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2600 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2605 { 2606 PetscFunctionBegin; 2607 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2608 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2614 2615 Not Collective 2616 2617 Input Parameter: 2618 . A - the matrix 2619 2620 Output Parameter: 2621 . nz - the number of nonzeros 2622 2623 Level: advanced 2624 2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2626 @*/ 2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2628 { 2629 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2630 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2631 PetscBool isaij; 2632 2633 PetscFunctionBegin; 2634 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2635 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2636 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 /*@ 2641 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2642 2643 Collective 2644 2645 Input Parameters: 2646 + A - the matrix 2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2648 2649 Level: advanced 2650 2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2652 @*/ 2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2654 { 2655 PetscFunctionBegin; 2656 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2661 { 2662 PetscBool sc = PETSC_FALSE, flg; 2663 2664 PetscFunctionBegin; 2665 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2666 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2667 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2668 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2669 PetscOptionsHeadEnd(); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2674 { 2675 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2676 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2677 2678 PetscFunctionBegin; 2679 if (!Y->preallocated) { 2680 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2681 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2682 PetscInt nonew = aij->nonew; 2683 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2684 aij->nonew = nonew; 2685 } 2686 PetscCall(MatShift_Basic(Y, a)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2696 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2697 if (d) { 2698 PetscInt rstart; 2699 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2700 *d += rstart; 2701 } 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2717 2718 PetscFunctionBegin; 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2720 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2725 MatGetRow_MPIAIJ, 2726 MatRestoreRow_MPIAIJ, 2727 MatMult_MPIAIJ, 2728 /* 4*/ MatMultAdd_MPIAIJ, 2729 MatMultTranspose_MPIAIJ, 2730 MatMultTransposeAdd_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*10*/ NULL, 2735 NULL, 2736 NULL, 2737 MatSOR_MPIAIJ, 2738 MatTranspose_MPIAIJ, 2739 /*15*/ MatGetInfo_MPIAIJ, 2740 MatEqual_MPIAIJ, 2741 MatGetDiagonal_MPIAIJ, 2742 MatDiagonalScale_MPIAIJ, 2743 MatNorm_MPIAIJ, 2744 /*20*/ MatAssemblyBegin_MPIAIJ, 2745 MatAssemblyEnd_MPIAIJ, 2746 MatSetOption_MPIAIJ, 2747 MatZeroEntries_MPIAIJ, 2748 /*24*/ MatZeroRows_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*29*/ MatSetUp_MPI_Hash, 2754 NULL, 2755 NULL, 2756 MatGetDiagonalBlock_MPIAIJ, 2757 NULL, 2758 /*34*/ MatDuplicate_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*39*/ MatAXPY_MPIAIJ, 2764 MatCreateSubMatrices_MPIAIJ, 2765 MatIncreaseOverlap_MPIAIJ, 2766 MatGetValues_MPIAIJ, 2767 MatCopy_MPIAIJ, 2768 /*44*/ MatGetRowMax_MPIAIJ, 2769 MatScale_MPIAIJ, 2770 MatShift_MPIAIJ, 2771 MatDiagonalSet_MPIAIJ, 2772 MatZeroRowsColumns_MPIAIJ, 2773 /*49*/ MatSetRandom_MPIAIJ, 2774 MatGetRowIJ_MPIAIJ, 2775 MatRestoreRowIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*54*/ MatFDColoringCreate_MPIXAIJ, 2779 NULL, 2780 MatSetUnfactored_MPIAIJ, 2781 MatPermute_MPIAIJ, 2782 NULL, 2783 /*59*/ MatCreateSubMatrix_MPIAIJ, 2784 MatDestroy_MPIAIJ, 2785 MatView_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*64*/ NULL, 2789 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2790 NULL, 2791 NULL, 2792 NULL, 2793 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2794 MatGetRowMinAbs_MPIAIJ, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*75*/ MatFDColoringApply_AIJ, 2800 MatSetFromOptions_MPIAIJ, 2801 NULL, 2802 NULL, 2803 MatFindZeroDiagonals_MPIAIJ, 2804 /*80*/ NULL, 2805 NULL, 2806 NULL, 2807 /*83*/ MatLoad_MPIAIJ, 2808 MatIsSymmetric_MPIAIJ, 2809 NULL, 2810 NULL, 2811 NULL, 2812 NULL, 2813 /*89*/ NULL, 2814 NULL, 2815 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2816 NULL, 2817 NULL, 2818 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2819 NULL, 2820 NULL, 2821 NULL, 2822 MatBindToCPU_MPIAIJ, 2823 /*99*/ MatProductSetFromOptions_MPIAIJ, 2824 NULL, 2825 NULL, 2826 MatConjugate_MPIAIJ, 2827 NULL, 2828 /*104*/ MatSetValuesRow_MPIAIJ, 2829 MatRealPart_MPIAIJ, 2830 MatImaginaryPart_MPIAIJ, 2831 NULL, 2832 NULL, 2833 /*109*/ NULL, 2834 NULL, 2835 MatGetRowMin_MPIAIJ, 2836 NULL, 2837 MatMissingDiagonal_MPIAIJ, 2838 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2839 NULL, 2840 MatGetGhosts_MPIAIJ, 2841 NULL, 2842 NULL, 2843 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2844 NULL, 2845 NULL, 2846 NULL, 2847 MatGetMultiProcBlock_MPIAIJ, 2848 /*124*/ MatFindNonzeroRows_MPIAIJ, 2849 MatGetColumnReductions_MPIAIJ, 2850 MatInvertBlockDiagonal_MPIAIJ, 2851 MatInvertVariableBlockDiagonal_MPIAIJ, 2852 MatCreateSubMatricesMPI_MPIAIJ, 2853 /*129*/ NULL, 2854 NULL, 2855 NULL, 2856 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2857 NULL, 2858 /*134*/ NULL, 2859 NULL, 2860 NULL, 2861 NULL, 2862 NULL, 2863 /*139*/ MatSetBlockSizes_MPIAIJ, 2864 NULL, 2865 NULL, 2866 MatFDColoringSetUp_MPIXAIJ, 2867 MatFindOffBlockDiagonalEntries_MPIAIJ, 2868 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2869 /*145*/ NULL, 2870 NULL, 2871 NULL, 2872 MatCreateGraph_Simple_AIJ, 2873 NULL, 2874 /*150*/ NULL, 2875 MatEliminateZeros_MPIAIJ, 2876 MatGetRowSumAbs_MPIAIJ}; 2877 2878 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2879 { 2880 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2881 2882 PetscFunctionBegin; 2883 PetscCall(MatStoreValues(aij->A)); 2884 PetscCall(MatStoreValues(aij->B)); 2885 PetscFunctionReturn(PETSC_SUCCESS); 2886 } 2887 2888 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2889 { 2890 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2891 2892 PetscFunctionBegin; 2893 PetscCall(MatRetrieveValues(aij->A)); 2894 PetscCall(MatRetrieveValues(aij->B)); 2895 PetscFunctionReturn(PETSC_SUCCESS); 2896 } 2897 2898 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2899 { 2900 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2901 PetscMPIInt size; 2902 2903 PetscFunctionBegin; 2904 if (B->hash_active) { 2905 B->ops[0] = b->cops; 2906 B->hash_active = PETSC_FALSE; 2907 } 2908 PetscCall(PetscLayoutSetUp(B->rmap)); 2909 PetscCall(PetscLayoutSetUp(B->cmap)); 2910 2911 #if defined(PETSC_USE_CTABLE) 2912 PetscCall(PetscHMapIDestroy(&b->colmap)); 2913 #else 2914 PetscCall(PetscFree(b->colmap)); 2915 #endif 2916 PetscCall(PetscFree(b->garray)); 2917 PetscCall(VecDestroy(&b->lvec)); 2918 PetscCall(VecScatterDestroy(&b->Mvctx)); 2919 2920 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2921 PetscCall(MatDestroy(&b->B)); 2922 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2923 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2924 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2925 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2926 2927 PetscCall(MatDestroy(&b->A)); 2928 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2929 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2930 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2931 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2932 2933 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2934 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2935 B->preallocated = PETSC_TRUE; 2936 B->was_assembled = PETSC_FALSE; 2937 B->assembled = PETSC_FALSE; 2938 PetscFunctionReturn(PETSC_SUCCESS); 2939 } 2940 2941 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2942 { 2943 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2944 2945 PetscFunctionBegin; 2946 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2947 PetscCall(PetscLayoutSetUp(B->rmap)); 2948 PetscCall(PetscLayoutSetUp(B->cmap)); 2949 2950 #if defined(PETSC_USE_CTABLE) 2951 PetscCall(PetscHMapIDestroy(&b->colmap)); 2952 #else 2953 PetscCall(PetscFree(b->colmap)); 2954 #endif 2955 PetscCall(PetscFree(b->garray)); 2956 PetscCall(VecDestroy(&b->lvec)); 2957 PetscCall(VecScatterDestroy(&b->Mvctx)); 2958 2959 PetscCall(MatResetPreallocation(b->A)); 2960 PetscCall(MatResetPreallocation(b->B)); 2961 B->preallocated = PETSC_TRUE; 2962 B->was_assembled = PETSC_FALSE; 2963 B->assembled = PETSC_FALSE; 2964 PetscFunctionReturn(PETSC_SUCCESS); 2965 } 2966 2967 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2968 { 2969 Mat mat; 2970 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2971 2972 PetscFunctionBegin; 2973 *newmat = NULL; 2974 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2975 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2976 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2977 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2978 a = (Mat_MPIAIJ *)mat->data; 2979 2980 mat->factortype = matin->factortype; 2981 mat->assembled = matin->assembled; 2982 mat->insertmode = NOT_SET_VALUES; 2983 2984 a->size = oldmat->size; 2985 a->rank = oldmat->rank; 2986 a->donotstash = oldmat->donotstash; 2987 a->roworiented = oldmat->roworiented; 2988 a->rowindices = NULL; 2989 a->rowvalues = NULL; 2990 a->getrowactive = PETSC_FALSE; 2991 2992 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2993 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2994 if (matin->hash_active) { 2995 PetscCall(MatSetUp(mat)); 2996 } else { 2997 mat->preallocated = matin->preallocated; 2998 if (oldmat->colmap) { 2999 #if defined(PETSC_USE_CTABLE) 3000 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3001 #else 3002 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3003 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3004 #endif 3005 } else a->colmap = NULL; 3006 if (oldmat->garray) { 3007 PetscInt len; 3008 len = oldmat->B->cmap->n; 3009 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3010 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3011 } else a->garray = NULL; 3012 3013 /* It may happen MatDuplicate is called with a non-assembled matrix 3014 In fact, MatDuplicate only requires the matrix to be preallocated 3015 This may happen inside a DMCreateMatrix_Shell */ 3016 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3017 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3018 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3019 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3020 } 3021 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3022 *newmat = mat; 3023 PetscFunctionReturn(PETSC_SUCCESS); 3024 } 3025 3026 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3027 { 3028 PetscBool isbinary, ishdf5; 3029 3030 PetscFunctionBegin; 3031 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3032 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3033 /* force binary viewer to load .info file if it has not yet done so */ 3034 PetscCall(PetscViewerSetUp(viewer)); 3035 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3036 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3037 if (isbinary) { 3038 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3039 } else if (ishdf5) { 3040 #if defined(PETSC_HAVE_HDF5) 3041 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3042 #else 3043 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3044 #endif 3045 } else { 3046 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3047 } 3048 PetscFunctionReturn(PETSC_SUCCESS); 3049 } 3050 3051 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3052 { 3053 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3054 PetscInt *rowidxs, *colidxs; 3055 PetscScalar *matvals; 3056 3057 PetscFunctionBegin; 3058 PetscCall(PetscViewerSetUp(viewer)); 3059 3060 /* read in matrix header */ 3061 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3062 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3063 M = header[1]; 3064 N = header[2]; 3065 nz = header[3]; 3066 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3067 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3068 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3069 3070 /* set block sizes from the viewer's .info file */ 3071 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3072 /* set global sizes if not set already */ 3073 if (mat->rmap->N < 0) mat->rmap->N = M; 3074 if (mat->cmap->N < 0) mat->cmap->N = N; 3075 PetscCall(PetscLayoutSetUp(mat->rmap)); 3076 PetscCall(PetscLayoutSetUp(mat->cmap)); 3077 3078 /* check if the matrix sizes are correct */ 3079 PetscCall(MatGetSize(mat, &rows, &cols)); 3080 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3081 3082 /* read in row lengths and build row indices */ 3083 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3084 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3085 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3086 rowidxs[0] = 0; 3087 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3088 if (nz != PETSC_MAX_INT) { 3089 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3090 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3091 } 3092 3093 /* read in column indices and matrix values */ 3094 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3095 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3096 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3097 /* store matrix indices and values */ 3098 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3099 PetscCall(PetscFree(rowidxs)); 3100 PetscCall(PetscFree2(colidxs, matvals)); 3101 PetscFunctionReturn(PETSC_SUCCESS); 3102 } 3103 3104 /* Not scalable because of ISAllGather() unless getting all columns. */ 3105 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3106 { 3107 IS iscol_local; 3108 PetscBool isstride; 3109 PetscMPIInt lisstride = 0, gisstride; 3110 3111 PetscFunctionBegin; 3112 /* check if we are grabbing all columns*/ 3113 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3114 3115 if (isstride) { 3116 PetscInt start, len, mstart, mlen; 3117 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3118 PetscCall(ISGetLocalSize(iscol, &len)); 3119 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3120 if (mstart == start && mlen - mstart == len) lisstride = 1; 3121 } 3122 3123 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3124 if (gisstride) { 3125 PetscInt N; 3126 PetscCall(MatGetSize(mat, NULL, &N)); 3127 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3128 PetscCall(ISSetIdentity(iscol_local)); 3129 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3130 } else { 3131 PetscInt cbs; 3132 PetscCall(ISGetBlockSize(iscol, &cbs)); 3133 PetscCall(ISAllGather(iscol, &iscol_local)); 3134 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3135 } 3136 3137 *isseq = iscol_local; 3138 PetscFunctionReturn(PETSC_SUCCESS); 3139 } 3140 3141 /* 3142 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3143 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3144 3145 Input Parameters: 3146 + mat - matrix 3147 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3148 i.e., mat->rstart <= isrow[i] < mat->rend 3149 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3150 i.e., mat->cstart <= iscol[i] < mat->cend 3151 3152 Output Parameters: 3153 + isrow_d - sequential row index set for retrieving mat->A 3154 . iscol_d - sequential column index set for retrieving mat->A 3155 . iscol_o - sequential column index set for retrieving mat->B 3156 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3157 */ 3158 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3159 { 3160 Vec x, cmap; 3161 const PetscInt *is_idx; 3162 PetscScalar *xarray, *cmaparray; 3163 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3164 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3165 Mat B = a->B; 3166 Vec lvec = a->lvec, lcmap; 3167 PetscInt i, cstart, cend, Bn = B->cmap->N; 3168 MPI_Comm comm; 3169 VecScatter Mvctx = a->Mvctx; 3170 3171 PetscFunctionBegin; 3172 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3173 PetscCall(ISGetLocalSize(iscol, &ncols)); 3174 3175 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3176 PetscCall(MatCreateVecs(mat, &x, NULL)); 3177 PetscCall(VecSet(x, -1.0)); 3178 PetscCall(VecDuplicate(x, &cmap)); 3179 PetscCall(VecSet(cmap, -1.0)); 3180 3181 /* Get start indices */ 3182 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3183 isstart -= ncols; 3184 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3185 3186 PetscCall(ISGetIndices(iscol, &is_idx)); 3187 PetscCall(VecGetArray(x, &xarray)); 3188 PetscCall(VecGetArray(cmap, &cmaparray)); 3189 PetscCall(PetscMalloc1(ncols, &idx)); 3190 for (i = 0; i < ncols; i++) { 3191 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3192 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3193 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3194 } 3195 PetscCall(VecRestoreArray(x, &xarray)); 3196 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3197 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3198 3199 /* Get iscol_d */ 3200 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3201 PetscCall(ISGetBlockSize(iscol, &i)); 3202 PetscCall(ISSetBlockSize(*iscol_d, i)); 3203 3204 /* Get isrow_d */ 3205 PetscCall(ISGetLocalSize(isrow, &m)); 3206 rstart = mat->rmap->rstart; 3207 PetscCall(PetscMalloc1(m, &idx)); 3208 PetscCall(ISGetIndices(isrow, &is_idx)); 3209 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3210 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3211 3212 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3213 PetscCall(ISGetBlockSize(isrow, &i)); 3214 PetscCall(ISSetBlockSize(*isrow_d, i)); 3215 3216 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3217 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3218 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3219 3220 PetscCall(VecDuplicate(lvec, &lcmap)); 3221 3222 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3223 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3224 3225 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3226 /* off-process column indices */ 3227 count = 0; 3228 PetscCall(PetscMalloc1(Bn, &idx)); 3229 PetscCall(PetscMalloc1(Bn, &cmap1)); 3230 3231 PetscCall(VecGetArray(lvec, &xarray)); 3232 PetscCall(VecGetArray(lcmap, &cmaparray)); 3233 for (i = 0; i < Bn; i++) { 3234 if (PetscRealPart(xarray[i]) > -1.0) { 3235 idx[count] = i; /* local column index in off-diagonal part B */ 3236 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3237 count++; 3238 } 3239 } 3240 PetscCall(VecRestoreArray(lvec, &xarray)); 3241 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3242 3243 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3244 /* cannot ensure iscol_o has same blocksize as iscol! */ 3245 3246 PetscCall(PetscFree(idx)); 3247 *garray = cmap1; 3248 3249 PetscCall(VecDestroy(&x)); 3250 PetscCall(VecDestroy(&cmap)); 3251 PetscCall(VecDestroy(&lcmap)); 3252 PetscFunctionReturn(PETSC_SUCCESS); 3253 } 3254 3255 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3256 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3257 { 3258 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3259 Mat M = NULL; 3260 MPI_Comm comm; 3261 IS iscol_d, isrow_d, iscol_o; 3262 Mat Asub = NULL, Bsub = NULL; 3263 PetscInt n; 3264 3265 PetscFunctionBegin; 3266 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3267 3268 if (call == MAT_REUSE_MATRIX) { 3269 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3270 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3271 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3272 3273 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3274 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3275 3276 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3277 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3278 3279 /* Update diagonal and off-diagonal portions of submat */ 3280 asub = (Mat_MPIAIJ *)(*submat)->data; 3281 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3282 PetscCall(ISGetLocalSize(iscol_o, &n)); 3283 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3284 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3285 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3286 3287 } else { /* call == MAT_INITIAL_MATRIX) */ 3288 const PetscInt *garray; 3289 PetscInt BsubN; 3290 3291 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3292 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3293 3294 /* Create local submatrices Asub and Bsub */ 3295 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3296 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3297 3298 /* Create submatrix M */ 3299 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3300 3301 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3302 asub = (Mat_MPIAIJ *)M->data; 3303 3304 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3305 n = asub->B->cmap->N; 3306 if (BsubN > n) { 3307 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3308 const PetscInt *idx; 3309 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3310 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3311 3312 PetscCall(PetscMalloc1(n, &idx_new)); 3313 j = 0; 3314 PetscCall(ISGetIndices(iscol_o, &idx)); 3315 for (i = 0; i < n; i++) { 3316 if (j >= BsubN) break; 3317 while (subgarray[i] > garray[j]) j++; 3318 3319 if (subgarray[i] == garray[j]) { 3320 idx_new[i] = idx[j++]; 3321 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3322 } 3323 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3324 3325 PetscCall(ISDestroy(&iscol_o)); 3326 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3327 3328 } else if (BsubN < n) { 3329 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3330 } 3331 3332 PetscCall(PetscFree(garray)); 3333 *submat = M; 3334 3335 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3336 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3337 PetscCall(ISDestroy(&isrow_d)); 3338 3339 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3340 PetscCall(ISDestroy(&iscol_d)); 3341 3342 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3343 PetscCall(ISDestroy(&iscol_o)); 3344 } 3345 PetscFunctionReturn(PETSC_SUCCESS); 3346 } 3347 3348 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3349 { 3350 IS iscol_local = NULL, isrow_d; 3351 PetscInt csize; 3352 PetscInt n, i, j, start, end; 3353 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3354 MPI_Comm comm; 3355 3356 PetscFunctionBegin; 3357 /* If isrow has same processor distribution as mat, 3358 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3359 if (call == MAT_REUSE_MATRIX) { 3360 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3361 if (isrow_d) { 3362 sameRowDist = PETSC_TRUE; 3363 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3364 } else { 3365 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3366 if (iscol_local) { 3367 sameRowDist = PETSC_TRUE; 3368 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3369 } 3370 } 3371 } else { 3372 /* Check if isrow has same processor distribution as mat */ 3373 sameDist[0] = PETSC_FALSE; 3374 PetscCall(ISGetLocalSize(isrow, &n)); 3375 if (!n) { 3376 sameDist[0] = PETSC_TRUE; 3377 } else { 3378 PetscCall(ISGetMinMax(isrow, &i, &j)); 3379 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3380 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3381 } 3382 3383 /* Check if iscol has same processor distribution as mat */ 3384 sameDist[1] = PETSC_FALSE; 3385 PetscCall(ISGetLocalSize(iscol, &n)); 3386 if (!n) { 3387 sameDist[1] = PETSC_TRUE; 3388 } else { 3389 PetscCall(ISGetMinMax(iscol, &i, &j)); 3390 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3391 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3392 } 3393 3394 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3395 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3396 sameRowDist = tsameDist[0]; 3397 } 3398 3399 if (sameRowDist) { 3400 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3401 /* isrow and iscol have same processor distribution as mat */ 3402 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3403 PetscFunctionReturn(PETSC_SUCCESS); 3404 } else { /* sameRowDist */ 3405 /* isrow has same processor distribution as mat */ 3406 if (call == MAT_INITIAL_MATRIX) { 3407 PetscBool sorted; 3408 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3409 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3410 PetscCall(ISGetSize(iscol, &i)); 3411 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3412 3413 PetscCall(ISSorted(iscol_local, &sorted)); 3414 if (sorted) { 3415 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3416 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3417 PetscFunctionReturn(PETSC_SUCCESS); 3418 } 3419 } else { /* call == MAT_REUSE_MATRIX */ 3420 IS iscol_sub; 3421 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3422 if (iscol_sub) { 3423 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } 3426 } 3427 } 3428 } 3429 3430 /* General case: iscol -> iscol_local which has global size of iscol */ 3431 if (call == MAT_REUSE_MATRIX) { 3432 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3433 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3434 } else { 3435 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3436 } 3437 3438 PetscCall(ISGetLocalSize(iscol, &csize)); 3439 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3440 3441 if (call == MAT_INITIAL_MATRIX) { 3442 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3443 PetscCall(ISDestroy(&iscol_local)); 3444 } 3445 PetscFunctionReturn(PETSC_SUCCESS); 3446 } 3447 3448 /*@C 3449 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3450 and "off-diagonal" part of the matrix in CSR format. 3451 3452 Collective 3453 3454 Input Parameters: 3455 + comm - MPI communicator 3456 . A - "diagonal" portion of matrix 3457 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3458 - garray - global index of `B` columns 3459 3460 Output Parameter: 3461 . mat - the matrix, with input `A` as its local diagonal matrix 3462 3463 Level: advanced 3464 3465 Notes: 3466 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3467 3468 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3469 3470 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3471 @*/ 3472 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3473 { 3474 Mat_MPIAIJ *maij; 3475 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3476 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3477 const PetscScalar *oa; 3478 Mat Bnew; 3479 PetscInt m, n, N; 3480 MatType mpi_mat_type; 3481 3482 PetscFunctionBegin; 3483 PetscCall(MatCreate(comm, mat)); 3484 PetscCall(MatGetSize(A, &m, &n)); 3485 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3486 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3487 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3488 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3489 3490 /* Get global columns of mat */ 3491 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3492 3493 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3494 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3495 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3496 PetscCall(MatSetType(*mat, mpi_mat_type)); 3497 3498 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3499 maij = (Mat_MPIAIJ *)(*mat)->data; 3500 3501 (*mat)->preallocated = PETSC_TRUE; 3502 3503 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3504 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3505 3506 /* Set A as diagonal portion of *mat */ 3507 maij->A = A; 3508 3509 nz = oi[m]; 3510 for (i = 0; i < nz; i++) { 3511 col = oj[i]; 3512 oj[i] = garray[col]; 3513 } 3514 3515 /* Set Bnew as off-diagonal portion of *mat */ 3516 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3517 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3518 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3519 bnew = (Mat_SeqAIJ *)Bnew->data; 3520 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3521 maij->B = Bnew; 3522 3523 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3524 3525 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3526 b->free_a = PETSC_FALSE; 3527 b->free_ij = PETSC_FALSE; 3528 PetscCall(MatDestroy(&B)); 3529 3530 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3531 bnew->free_a = PETSC_TRUE; 3532 bnew->free_ij = PETSC_TRUE; 3533 3534 /* condense columns of maij->B */ 3535 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3536 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3537 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3538 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3539 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3540 PetscFunctionReturn(PETSC_SUCCESS); 3541 } 3542 3543 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3544 3545 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3546 { 3547 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3548 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3549 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3550 Mat M, Msub, B = a->B; 3551 MatScalar *aa; 3552 Mat_SeqAIJ *aij; 3553 PetscInt *garray = a->garray, *colsub, Ncols; 3554 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3555 IS iscol_sub, iscmap; 3556 const PetscInt *is_idx, *cmap; 3557 PetscBool allcolumns = PETSC_FALSE; 3558 MPI_Comm comm; 3559 3560 PetscFunctionBegin; 3561 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3562 if (call == MAT_REUSE_MATRIX) { 3563 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3564 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3565 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3566 3567 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3568 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3569 3570 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3571 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3572 3573 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3574 3575 } else { /* call == MAT_INITIAL_MATRIX) */ 3576 PetscBool flg; 3577 3578 PetscCall(ISGetLocalSize(iscol, &n)); 3579 PetscCall(ISGetSize(iscol, &Ncols)); 3580 3581 /* (1) iscol -> nonscalable iscol_local */ 3582 /* Check for special case: each processor gets entire matrix columns */ 3583 PetscCall(ISIdentity(iscol_local, &flg)); 3584 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3585 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3586 if (allcolumns) { 3587 iscol_sub = iscol_local; 3588 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3589 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3590 3591 } else { 3592 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3593 PetscInt *idx, *cmap1, k; 3594 PetscCall(PetscMalloc1(Ncols, &idx)); 3595 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3596 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3597 count = 0; 3598 k = 0; 3599 for (i = 0; i < Ncols; i++) { 3600 j = is_idx[i]; 3601 if (j >= cstart && j < cend) { 3602 /* diagonal part of mat */ 3603 idx[count] = j; 3604 cmap1[count++] = i; /* column index in submat */ 3605 } else if (Bn) { 3606 /* off-diagonal part of mat */ 3607 if (j == garray[k]) { 3608 idx[count] = j; 3609 cmap1[count++] = i; /* column index in submat */ 3610 } else if (j > garray[k]) { 3611 while (j > garray[k] && k < Bn - 1) k++; 3612 if (j == garray[k]) { 3613 idx[count] = j; 3614 cmap1[count++] = i; /* column index in submat */ 3615 } 3616 } 3617 } 3618 } 3619 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3620 3621 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3622 PetscCall(ISGetBlockSize(iscol, &cbs)); 3623 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3624 3625 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3626 } 3627 3628 /* (3) Create sequential Msub */ 3629 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3630 } 3631 3632 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3633 aij = (Mat_SeqAIJ *)(Msub)->data; 3634 ii = aij->i; 3635 PetscCall(ISGetIndices(iscmap, &cmap)); 3636 3637 /* 3638 m - number of local rows 3639 Ncols - number of columns (same on all processors) 3640 rstart - first row in new global matrix generated 3641 */ 3642 PetscCall(MatGetSize(Msub, &m, NULL)); 3643 3644 if (call == MAT_INITIAL_MATRIX) { 3645 /* (4) Create parallel newmat */ 3646 PetscMPIInt rank, size; 3647 PetscInt csize; 3648 3649 PetscCallMPI(MPI_Comm_size(comm, &size)); 3650 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3651 3652 /* 3653 Determine the number of non-zeros in the diagonal and off-diagonal 3654 portions of the matrix in order to do correct preallocation 3655 */ 3656 3657 /* first get start and end of "diagonal" columns */ 3658 PetscCall(ISGetLocalSize(iscol, &csize)); 3659 if (csize == PETSC_DECIDE) { 3660 PetscCall(ISGetSize(isrow, &mglobal)); 3661 if (mglobal == Ncols) { /* square matrix */ 3662 nlocal = m; 3663 } else { 3664 nlocal = Ncols / size + ((Ncols % size) > rank); 3665 } 3666 } else { 3667 nlocal = csize; 3668 } 3669 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3670 rstart = rend - nlocal; 3671 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3672 3673 /* next, compute all the lengths */ 3674 jj = aij->j; 3675 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3676 olens = dlens + m; 3677 for (i = 0; i < m; i++) { 3678 jend = ii[i + 1] - ii[i]; 3679 olen = 0; 3680 dlen = 0; 3681 for (j = 0; j < jend; j++) { 3682 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3683 else dlen++; 3684 jj++; 3685 } 3686 olens[i] = olen; 3687 dlens[i] = dlen; 3688 } 3689 3690 PetscCall(ISGetBlockSize(isrow, &bs)); 3691 PetscCall(ISGetBlockSize(iscol, &cbs)); 3692 3693 PetscCall(MatCreate(comm, &M)); 3694 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3695 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3696 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3697 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3698 PetscCall(PetscFree(dlens)); 3699 3700 } else { /* call == MAT_REUSE_MATRIX */ 3701 M = *newmat; 3702 PetscCall(MatGetLocalSize(M, &i, NULL)); 3703 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3704 PetscCall(MatZeroEntries(M)); 3705 /* 3706 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3707 rather than the slower MatSetValues(). 3708 */ 3709 M->was_assembled = PETSC_TRUE; 3710 M->assembled = PETSC_FALSE; 3711 } 3712 3713 /* (5) Set values of Msub to *newmat */ 3714 PetscCall(PetscMalloc1(count, &colsub)); 3715 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3716 3717 jj = aij->j; 3718 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3719 for (i = 0; i < m; i++) { 3720 row = rstart + i; 3721 nz = ii[i + 1] - ii[i]; 3722 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3723 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3724 jj += nz; 3725 aa += nz; 3726 } 3727 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3728 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3729 3730 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3731 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3732 3733 PetscCall(PetscFree(colsub)); 3734 3735 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3736 if (call == MAT_INITIAL_MATRIX) { 3737 *newmat = M; 3738 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3739 PetscCall(MatDestroy(&Msub)); 3740 3741 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3742 PetscCall(ISDestroy(&iscol_sub)); 3743 3744 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3745 PetscCall(ISDestroy(&iscmap)); 3746 3747 if (iscol_local) { 3748 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3749 PetscCall(ISDestroy(&iscol_local)); 3750 } 3751 } 3752 PetscFunctionReturn(PETSC_SUCCESS); 3753 } 3754 3755 /* 3756 Not great since it makes two copies of the submatrix, first an SeqAIJ 3757 in local and then by concatenating the local matrices the end result. 3758 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3759 3760 This requires a sequential iscol with all indices. 3761 */ 3762 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3763 { 3764 PetscMPIInt rank, size; 3765 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3766 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3767 Mat M, Mreuse; 3768 MatScalar *aa, *vwork; 3769 MPI_Comm comm; 3770 Mat_SeqAIJ *aij; 3771 PetscBool colflag, allcolumns = PETSC_FALSE; 3772 3773 PetscFunctionBegin; 3774 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3775 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3776 PetscCallMPI(MPI_Comm_size(comm, &size)); 3777 3778 /* Check for special case: each processor gets entire matrix columns */ 3779 PetscCall(ISIdentity(iscol, &colflag)); 3780 PetscCall(ISGetLocalSize(iscol, &n)); 3781 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3782 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3783 3784 if (call == MAT_REUSE_MATRIX) { 3785 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3786 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3787 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3788 } else { 3789 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3790 } 3791 3792 /* 3793 m - number of local rows 3794 n - number of columns (same on all processors) 3795 rstart - first row in new global matrix generated 3796 */ 3797 PetscCall(MatGetSize(Mreuse, &m, &n)); 3798 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3799 if (call == MAT_INITIAL_MATRIX) { 3800 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3801 ii = aij->i; 3802 jj = aij->j; 3803 3804 /* 3805 Determine the number of non-zeros in the diagonal and off-diagonal 3806 portions of the matrix in order to do correct preallocation 3807 */ 3808 3809 /* first get start and end of "diagonal" columns */ 3810 if (csize == PETSC_DECIDE) { 3811 PetscCall(ISGetSize(isrow, &mglobal)); 3812 if (mglobal == n) { /* square matrix */ 3813 nlocal = m; 3814 } else { 3815 nlocal = n / size + ((n % size) > rank); 3816 } 3817 } else { 3818 nlocal = csize; 3819 } 3820 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3821 rstart = rend - nlocal; 3822 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3823 3824 /* next, compute all the lengths */ 3825 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3826 olens = dlens + m; 3827 for (i = 0; i < m; i++) { 3828 jend = ii[i + 1] - ii[i]; 3829 olen = 0; 3830 dlen = 0; 3831 for (j = 0; j < jend; j++) { 3832 if (*jj < rstart || *jj >= rend) olen++; 3833 else dlen++; 3834 jj++; 3835 } 3836 olens[i] = olen; 3837 dlens[i] = dlen; 3838 } 3839 PetscCall(MatCreate(comm, &M)); 3840 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3841 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3842 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3843 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3844 PetscCall(PetscFree(dlens)); 3845 } else { 3846 PetscInt ml, nl; 3847 3848 M = *newmat; 3849 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3850 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3851 PetscCall(MatZeroEntries(M)); 3852 /* 3853 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3854 rather than the slower MatSetValues(). 3855 */ 3856 M->was_assembled = PETSC_TRUE; 3857 M->assembled = PETSC_FALSE; 3858 } 3859 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3860 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3861 ii = aij->i; 3862 jj = aij->j; 3863 3864 /* trigger copy to CPU if needed */ 3865 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3866 for (i = 0; i < m; i++) { 3867 row = rstart + i; 3868 nz = ii[i + 1] - ii[i]; 3869 cwork = jj; 3870 jj = PetscSafePointerPlusOffset(jj, nz); 3871 vwork = aa; 3872 aa = PetscSafePointerPlusOffset(aa, nz); 3873 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3874 } 3875 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3876 3877 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3878 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3879 *newmat = M; 3880 3881 /* save submatrix used in processor for next request */ 3882 if (call == MAT_INITIAL_MATRIX) { 3883 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3884 PetscCall(MatDestroy(&Mreuse)); 3885 } 3886 PetscFunctionReturn(PETSC_SUCCESS); 3887 } 3888 3889 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3890 { 3891 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3892 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3893 const PetscInt *JJ; 3894 PetscBool nooffprocentries; 3895 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3896 3897 PetscFunctionBegin; 3898 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3899 3900 PetscCall(PetscLayoutSetUp(B->rmap)); 3901 PetscCall(PetscLayoutSetUp(B->cmap)); 3902 m = B->rmap->n; 3903 cstart = B->cmap->rstart; 3904 cend = B->cmap->rend; 3905 rstart = B->rmap->rstart; 3906 3907 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3908 3909 if (PetscDefined(USE_DEBUG)) { 3910 for (i = 0; i < m; i++) { 3911 nnz = Ii[i + 1] - Ii[i]; 3912 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3913 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3914 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3915 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3916 } 3917 } 3918 3919 for (i = 0; i < m; i++) { 3920 nnz = Ii[i + 1] - Ii[i]; 3921 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3922 nnz_max = PetscMax(nnz_max, nnz); 3923 d = 0; 3924 for (j = 0; j < nnz; j++) { 3925 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3926 } 3927 d_nnz[i] = d; 3928 o_nnz[i] = nnz - d; 3929 } 3930 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3931 PetscCall(PetscFree2(d_nnz, o_nnz)); 3932 3933 for (i = 0; i < m; i++) { 3934 ii = i + rstart; 3935 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES)); 3936 } 3937 nooffprocentries = B->nooffprocentries; 3938 B->nooffprocentries = PETSC_TRUE; 3939 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3940 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3941 B->nooffprocentries = nooffprocentries; 3942 3943 /* count number of entries below block diagonal */ 3944 PetscCall(PetscFree(Aij->ld)); 3945 PetscCall(PetscCalloc1(m, &ld)); 3946 Aij->ld = ld; 3947 for (i = 0; i < m; i++) { 3948 nnz = Ii[i + 1] - Ii[i]; 3949 j = 0; 3950 while (j < nnz && J[j] < cstart) j++; 3951 ld[i] = j; 3952 if (J) J += nnz; 3953 } 3954 3955 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3956 PetscFunctionReturn(PETSC_SUCCESS); 3957 } 3958 3959 /*@ 3960 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3961 (the default parallel PETSc format). 3962 3963 Collective 3964 3965 Input Parameters: 3966 + B - the matrix 3967 . i - the indices into j for the start of each local row (starts with zero) 3968 . j - the column indices for each local row (starts with zero) 3969 - v - optional values in the matrix 3970 3971 Level: developer 3972 3973 Notes: 3974 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3975 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3976 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3977 3978 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3979 3980 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3981 3982 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3983 3984 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3985 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3986 3987 The format which is used for the sparse matrix input, is equivalent to a 3988 row-major ordering.. i.e for the following matrix, the input data expected is 3989 as shown 3990 .vb 3991 1 0 0 3992 2 0 3 P0 3993 ------- 3994 4 5 6 P1 3995 3996 Process0 [P0] rows_owned=[0,1] 3997 i = {0,1,3} [size = nrow+1 = 2+1] 3998 j = {0,0,2} [size = 3] 3999 v = {1,2,3} [size = 3] 4000 4001 Process1 [P1] rows_owned=[2] 4002 i = {0,3} [size = nrow+1 = 1+1] 4003 j = {0,1,2} [size = 3] 4004 v = {4,5,6} [size = 3] 4005 .ve 4006 4007 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4008 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4009 @*/ 4010 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4011 { 4012 PetscFunctionBegin; 4013 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4014 PetscFunctionReturn(PETSC_SUCCESS); 4015 } 4016 4017 /*@C 4018 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4019 (the default parallel PETSc format). For good matrix assembly performance 4020 the user should preallocate the matrix storage by setting the parameters 4021 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4022 4023 Collective 4024 4025 Input Parameters: 4026 + B - the matrix 4027 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4028 (same value is used for all local rows) 4029 . d_nnz - array containing the number of nonzeros in the various rows of the 4030 DIAGONAL portion of the local submatrix (possibly different for each row) 4031 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4032 The size of this array is equal to the number of local rows, i.e 'm'. 4033 For matrices that will be factored, you must leave room for (and set) 4034 the diagonal entry even if it is zero. 4035 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4036 submatrix (same value is used for all local rows). 4037 - o_nnz - array containing the number of nonzeros in the various rows of the 4038 OFF-DIAGONAL portion of the local submatrix (possibly different for 4039 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4040 structure. The size of this array is equal to the number 4041 of local rows, i.e 'm'. 4042 4043 Example Usage: 4044 Consider the following 8x8 matrix with 34 non-zero values, that is 4045 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4046 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4047 as follows 4048 4049 .vb 4050 1 2 0 | 0 3 0 | 0 4 4051 Proc0 0 5 6 | 7 0 0 | 8 0 4052 9 0 10 | 11 0 0 | 12 0 4053 ------------------------------------- 4054 13 0 14 | 15 16 17 | 0 0 4055 Proc1 0 18 0 | 19 20 21 | 0 0 4056 0 0 0 | 22 23 0 | 24 0 4057 ------------------------------------- 4058 Proc2 25 26 27 | 0 0 28 | 29 0 4059 30 0 0 | 31 32 33 | 0 34 4060 .ve 4061 4062 This can be represented as a collection of submatrices as 4063 .vb 4064 A B C 4065 D E F 4066 G H I 4067 .ve 4068 4069 Where the submatrices A,B,C are owned by proc0, D,E,F are 4070 owned by proc1, G,H,I are owned by proc2. 4071 4072 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4073 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4074 The 'M','N' parameters are 8,8, and have the same values on all procs. 4075 4076 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4077 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4078 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4079 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4080 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4081 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4082 4083 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4084 allocated for every row of the local diagonal submatrix, and `o_nz` 4085 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4086 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4087 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4088 In this case, the values of `d_nz`, `o_nz` are 4089 .vb 4090 proc0 dnz = 2, o_nz = 2 4091 proc1 dnz = 3, o_nz = 2 4092 proc2 dnz = 1, o_nz = 4 4093 .ve 4094 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4095 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4096 for proc3. i.e we are using 12+15+10=37 storage locations to store 4097 34 values. 4098 4099 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4100 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4101 In the above case the values for `d_nnz`, `o_nnz` are 4102 .vb 4103 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4104 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4105 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4106 .ve 4107 Here the space allocated is sum of all the above values i.e 34, and 4108 hence pre-allocation is perfect. 4109 4110 Level: intermediate 4111 4112 Notes: 4113 If the *_nnz parameter is given then the *_nz parameter is ignored 4114 4115 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4116 storage. The stored row and column indices begin with zero. 4117 See [Sparse Matrices](sec_matsparse) for details. 4118 4119 The parallel matrix is partitioned such that the first m0 rows belong to 4120 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4121 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4122 4123 The DIAGONAL portion of the local submatrix of a processor can be defined 4124 as the submatrix which is obtained by extraction the part corresponding to 4125 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4126 first row that belongs to the processor, r2 is the last row belonging to 4127 the this processor, and c1-c2 is range of indices of the local part of a 4128 vector suitable for applying the matrix to. This is an mxn matrix. In the 4129 common case of a square matrix, the row and column ranges are the same and 4130 the DIAGONAL part is also square. The remaining portion of the local 4131 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4132 4133 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4134 4135 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4136 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4137 You can also run with the option `-info` and look for messages with the string 4138 malloc in them to see if additional memory allocation was needed. 4139 4140 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4141 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4142 @*/ 4143 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4144 { 4145 PetscFunctionBegin; 4146 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4147 PetscValidType(B, 1); 4148 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4149 PetscFunctionReturn(PETSC_SUCCESS); 4150 } 4151 4152 /*@ 4153 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4154 CSR format for the local rows. 4155 4156 Collective 4157 4158 Input Parameters: 4159 + comm - MPI communicator 4160 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4161 . n - This value should be the same as the local size used in creating the 4162 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4163 calculated if N is given) For square matrices n is almost always m. 4164 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4165 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4166 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4167 . j - global column indices 4168 - a - optional matrix values 4169 4170 Output Parameter: 4171 . mat - the matrix 4172 4173 Level: intermediate 4174 4175 Notes: 4176 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4177 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4178 called this routine. Use `MatCreateMPIAIJWithSplitArray()` to avoid needing to copy the arrays. 4179 4180 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4181 4182 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4183 4184 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4185 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4186 4187 The format which is used for the sparse matrix input, is equivalent to a 4188 row-major ordering.. i.e for the following matrix, the input data expected is 4189 as shown 4190 .vb 4191 1 0 0 4192 2 0 3 P0 4193 ------- 4194 4 5 6 P1 4195 4196 Process0 [P0] rows_owned=[0,1] 4197 i = {0,1,3} [size = nrow+1 = 2+1] 4198 j = {0,0,2} [size = 3] 4199 v = {1,2,3} [size = 3] 4200 4201 Process1 [P1] rows_owned=[2] 4202 i = {0,3} [size = nrow+1 = 1+1] 4203 j = {0,1,2} [size = 3] 4204 v = {4,5,6} [size = 3] 4205 .ve 4206 4207 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4208 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4209 @*/ 4210 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4211 { 4212 PetscFunctionBegin; 4213 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4214 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4215 PetscCall(MatCreate(comm, mat)); 4216 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4217 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4218 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4219 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4220 PetscFunctionReturn(PETSC_SUCCESS); 4221 } 4222 4223 /*@ 4224 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4225 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4226 from `MatCreateMPIAIJWithArrays()` 4227 4228 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4229 4230 Collective 4231 4232 Input Parameters: 4233 + mat - the matrix 4234 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4235 . n - This value should be the same as the local size used in creating the 4236 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4237 calculated if N is given) For square matrices n is almost always m. 4238 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4239 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4240 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4241 . J - column indices 4242 - v - matrix values 4243 4244 Level: deprecated 4245 4246 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4247 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4248 @*/ 4249 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4250 { 4251 PetscInt nnz, i; 4252 PetscBool nooffprocentries; 4253 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4254 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4255 PetscScalar *ad, *ao; 4256 PetscInt ldi, Iii, md; 4257 const PetscInt *Adi = Ad->i; 4258 PetscInt *ld = Aij->ld; 4259 4260 PetscFunctionBegin; 4261 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4262 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4263 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4264 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4265 4266 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4267 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4268 4269 for (i = 0; i < m; i++) { 4270 if (PetscDefined(USE_DEBUG)) { 4271 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4272 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4273 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4274 } 4275 } 4276 nnz = Ii[i + 1] - Ii[i]; 4277 Iii = Ii[i]; 4278 ldi = ld[i]; 4279 md = Adi[i + 1] - Adi[i]; 4280 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4281 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4282 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4283 ad += md; 4284 ao += nnz - md; 4285 } 4286 nooffprocentries = mat->nooffprocentries; 4287 mat->nooffprocentries = PETSC_TRUE; 4288 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4289 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4290 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4291 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4292 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4293 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4294 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4295 mat->nooffprocentries = nooffprocentries; 4296 PetscFunctionReturn(PETSC_SUCCESS); 4297 } 4298 4299 /*@ 4300 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4301 4302 Collective 4303 4304 Input Parameters: 4305 + mat - the matrix 4306 - v - matrix values, stored by row 4307 4308 Level: intermediate 4309 4310 Notes: 4311 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4312 4313 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4314 4315 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4316 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4317 @*/ 4318 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4319 { 4320 PetscInt nnz, i, m; 4321 PetscBool nooffprocentries; 4322 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4323 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4324 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4325 PetscScalar *ad, *ao; 4326 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4327 PetscInt ldi, Iii, md; 4328 PetscInt *ld = Aij->ld; 4329 4330 PetscFunctionBegin; 4331 m = mat->rmap->n; 4332 4333 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4334 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4335 Iii = 0; 4336 for (i = 0; i < m; i++) { 4337 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4338 ldi = ld[i]; 4339 md = Adi[i + 1] - Adi[i]; 4340 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4341 ad += md; 4342 if (ao) { 4343 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4344 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4345 ao += nnz - md; 4346 } 4347 Iii += nnz; 4348 } 4349 nooffprocentries = mat->nooffprocentries; 4350 mat->nooffprocentries = PETSC_TRUE; 4351 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4352 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4353 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4354 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4355 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4356 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4357 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4358 mat->nooffprocentries = nooffprocentries; 4359 PetscFunctionReturn(PETSC_SUCCESS); 4360 } 4361 4362 /*@C 4363 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4364 (the default parallel PETSc format). For good matrix assembly performance 4365 the user should preallocate the matrix storage by setting the parameters 4366 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4367 4368 Collective 4369 4370 Input Parameters: 4371 + comm - MPI communicator 4372 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4373 This value should be the same as the local size used in creating the 4374 y vector for the matrix-vector product y = Ax. 4375 . n - This value should be the same as the local size used in creating the 4376 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4377 calculated if N is given) For square matrices n is almost always m. 4378 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4379 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4380 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4381 (same value is used for all local rows) 4382 . d_nnz - array containing the number of nonzeros in the various rows of the 4383 DIAGONAL portion of the local submatrix (possibly different for each row) 4384 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4385 The size of this array is equal to the number of local rows, i.e 'm'. 4386 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4387 submatrix (same value is used for all local rows). 4388 - o_nnz - array containing the number of nonzeros in the various rows of the 4389 OFF-DIAGONAL portion of the local submatrix (possibly different for 4390 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4391 structure. The size of this array is equal to the number 4392 of local rows, i.e 'm'. 4393 4394 Output Parameter: 4395 . A - the matrix 4396 4397 Options Database Keys: 4398 + -mat_no_inode - Do not use inodes 4399 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4400 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4401 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4402 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4403 4404 Level: intermediate 4405 4406 Notes: 4407 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4408 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4409 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4410 4411 If the *_nnz parameter is given then the *_nz parameter is ignored 4412 4413 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4414 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4415 storage requirements for this matrix. 4416 4417 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4418 processor than it must be used on all processors that share the object for 4419 that argument. 4420 4421 The user MUST specify either the local or global matrix dimensions 4422 (possibly both). 4423 4424 The parallel matrix is partitioned across processors such that the 4425 first m0 rows belong to process 0, the next m1 rows belong to 4426 process 1, the next m2 rows belong to process 2 etc.. where 4427 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4428 values corresponding to [m x N] submatrix. 4429 4430 The columns are logically partitioned with the n0 columns belonging 4431 to 0th partition, the next n1 columns belonging to the next 4432 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4433 4434 The DIAGONAL portion of the local submatrix on any given processor 4435 is the submatrix corresponding to the rows and columns m,n 4436 corresponding to the given processor. i.e diagonal matrix on 4437 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4438 etc. The remaining portion of the local submatrix [m x (N-n)] 4439 constitute the OFF-DIAGONAL portion. The example below better 4440 illustrates this concept. 4441 4442 For a square global matrix we define each processor's diagonal portion 4443 to be its local rows and the corresponding columns (a square submatrix); 4444 each processor's off-diagonal portion encompasses the remainder of the 4445 local matrix (a rectangular submatrix). 4446 4447 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4448 4449 When calling this routine with a single process communicator, a matrix of 4450 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4451 type of communicator, use the construction mechanism 4452 .vb 4453 MatCreate(..., &A); 4454 MatSetType(A, MATMPIAIJ); 4455 MatSetSizes(A, m, n, M, N); 4456 MatMPIAIJSetPreallocation(A, ...); 4457 .ve 4458 4459 By default, this format uses inodes (identical nodes) when possible. 4460 We search for consecutive rows with the same nonzero structure, thereby 4461 reusing matrix information to achieve increased efficiency. 4462 4463 Example Usage: 4464 Consider the following 8x8 matrix with 34 non-zero values, that is 4465 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4466 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4467 as follows 4468 4469 .vb 4470 1 2 0 | 0 3 0 | 0 4 4471 Proc0 0 5 6 | 7 0 0 | 8 0 4472 9 0 10 | 11 0 0 | 12 0 4473 ------------------------------------- 4474 13 0 14 | 15 16 17 | 0 0 4475 Proc1 0 18 0 | 19 20 21 | 0 0 4476 0 0 0 | 22 23 0 | 24 0 4477 ------------------------------------- 4478 Proc2 25 26 27 | 0 0 28 | 29 0 4479 30 0 0 | 31 32 33 | 0 34 4480 .ve 4481 4482 This can be represented as a collection of submatrices as 4483 4484 .vb 4485 A B C 4486 D E F 4487 G H I 4488 .ve 4489 4490 Where the submatrices A,B,C are owned by proc0, D,E,F are 4491 owned by proc1, G,H,I are owned by proc2. 4492 4493 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4494 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4495 The 'M','N' parameters are 8,8, and have the same values on all procs. 4496 4497 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4498 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4499 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4500 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4501 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4502 matrix, ans [DF] as another SeqAIJ matrix. 4503 4504 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4505 allocated for every row of the local diagonal submatrix, and `o_nz` 4506 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4507 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4508 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4509 In this case, the values of `d_nz`,`o_nz` are 4510 .vb 4511 proc0 dnz = 2, o_nz = 2 4512 proc1 dnz = 3, o_nz = 2 4513 proc2 dnz = 1, o_nz = 4 4514 .ve 4515 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4516 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4517 for proc3. i.e we are using 12+15+10=37 storage locations to store 4518 34 values. 4519 4520 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4521 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4522 In the above case the values for d_nnz,o_nnz are 4523 .vb 4524 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4525 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4526 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4527 .ve 4528 Here the space allocated is sum of all the above values i.e 34, and 4529 hence pre-allocation is perfect. 4530 4531 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4532 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4533 @*/ 4534 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4535 { 4536 PetscMPIInt size; 4537 4538 PetscFunctionBegin; 4539 PetscCall(MatCreate(comm, A)); 4540 PetscCall(MatSetSizes(*A, m, n, M, N)); 4541 PetscCallMPI(MPI_Comm_size(comm, &size)); 4542 if (size > 1) { 4543 PetscCall(MatSetType(*A, MATMPIAIJ)); 4544 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4545 } else { 4546 PetscCall(MatSetType(*A, MATSEQAIJ)); 4547 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4548 } 4549 PetscFunctionReturn(PETSC_SUCCESS); 4550 } 4551 4552 /*MC 4553 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4554 4555 Synopsis: 4556 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4557 4558 Not Collective 4559 4560 Input Parameter: 4561 . A - the `MATMPIAIJ` matrix 4562 4563 Output Parameters: 4564 + Ad - the diagonal portion of the matrix 4565 . Ao - the off-diagonal portion of the matrix 4566 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4567 - ierr - error code 4568 4569 Level: advanced 4570 4571 Note: 4572 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4573 4574 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4575 M*/ 4576 4577 /*MC 4578 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4579 4580 Synopsis: 4581 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4582 4583 Not Collective 4584 4585 Input Parameters: 4586 + A - the `MATMPIAIJ` matrix 4587 . Ad - the diagonal portion of the matrix 4588 . Ao - the off-diagonal portion of the matrix 4589 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4590 - ierr - error code 4591 4592 Level: advanced 4593 4594 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4595 M*/ 4596 4597 /*@C 4598 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4599 4600 Not Collective 4601 4602 Input Parameter: 4603 . A - The `MATMPIAIJ` matrix 4604 4605 Output Parameters: 4606 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4607 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4608 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4609 4610 Level: intermediate 4611 4612 Note: 4613 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4614 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4615 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4616 local column numbers to global column numbers in the original matrix. 4617 4618 Fortran Notes: 4619 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4620 4621 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4622 @*/ 4623 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4624 { 4625 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4626 PetscBool flg; 4627 4628 PetscFunctionBegin; 4629 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4630 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4631 if (Ad) *Ad = a->A; 4632 if (Ao) *Ao = a->B; 4633 if (colmap) *colmap = a->garray; 4634 PetscFunctionReturn(PETSC_SUCCESS); 4635 } 4636 4637 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4638 { 4639 PetscInt m, N, i, rstart, nnz, Ii; 4640 PetscInt *indx; 4641 PetscScalar *values; 4642 MatType rootType; 4643 4644 PetscFunctionBegin; 4645 PetscCall(MatGetSize(inmat, &m, &N)); 4646 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4647 PetscInt *dnz, *onz, sum, bs, cbs; 4648 4649 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4650 /* Check sum(n) = N */ 4651 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4652 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4653 4654 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4655 rstart -= m; 4656 4657 MatPreallocateBegin(comm, m, n, dnz, onz); 4658 for (i = 0; i < m; i++) { 4659 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4660 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4661 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4662 } 4663 4664 PetscCall(MatCreate(comm, outmat)); 4665 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4666 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4667 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4668 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4669 PetscCall(MatSetType(*outmat, rootType)); 4670 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4671 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4672 MatPreallocateEnd(dnz, onz); 4673 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4674 } 4675 4676 /* numeric phase */ 4677 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4678 for (i = 0; i < m; i++) { 4679 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4680 Ii = i + rstart; 4681 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4682 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4683 } 4684 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4685 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4686 PetscFunctionReturn(PETSC_SUCCESS); 4687 } 4688 4689 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4690 { 4691 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4692 4693 PetscFunctionBegin; 4694 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4695 PetscCall(PetscFree(merge->id_r)); 4696 PetscCall(PetscFree(merge->len_s)); 4697 PetscCall(PetscFree(merge->len_r)); 4698 PetscCall(PetscFree(merge->bi)); 4699 PetscCall(PetscFree(merge->bj)); 4700 PetscCall(PetscFree(merge->buf_ri[0])); 4701 PetscCall(PetscFree(merge->buf_ri)); 4702 PetscCall(PetscFree(merge->buf_rj[0])); 4703 PetscCall(PetscFree(merge->buf_rj)); 4704 PetscCall(PetscFree(merge->coi)); 4705 PetscCall(PetscFree(merge->coj)); 4706 PetscCall(PetscFree(merge->owners_co)); 4707 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4708 PetscCall(PetscFree(merge)); 4709 PetscFunctionReturn(PETSC_SUCCESS); 4710 } 4711 4712 #include <../src/mat/utils/freespace.h> 4713 #include <petscbt.h> 4714 4715 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4716 { 4717 MPI_Comm comm; 4718 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4719 PetscMPIInt size, rank, taga, *len_s; 4720 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4721 PetscInt proc, m; 4722 PetscInt **buf_ri, **buf_rj; 4723 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4724 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4725 MPI_Request *s_waits, *r_waits; 4726 MPI_Status *status; 4727 const MatScalar *aa, *a_a; 4728 MatScalar **abuf_r, *ba_i; 4729 Mat_Merge_SeqsToMPI *merge; 4730 PetscContainer container; 4731 4732 PetscFunctionBegin; 4733 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4734 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4735 4736 PetscCallMPI(MPI_Comm_size(comm, &size)); 4737 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4738 4739 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4740 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4741 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4742 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4743 aa = a_a; 4744 4745 bi = merge->bi; 4746 bj = merge->bj; 4747 buf_ri = merge->buf_ri; 4748 buf_rj = merge->buf_rj; 4749 4750 PetscCall(PetscMalloc1(size, &status)); 4751 owners = merge->rowmap->range; 4752 len_s = merge->len_s; 4753 4754 /* send and recv matrix values */ 4755 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4756 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4757 4758 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4759 for (proc = 0, k = 0; proc < size; proc++) { 4760 if (!len_s[proc]) continue; 4761 i = owners[proc]; 4762 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4763 k++; 4764 } 4765 4766 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4767 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4768 PetscCall(PetscFree(status)); 4769 4770 PetscCall(PetscFree(s_waits)); 4771 PetscCall(PetscFree(r_waits)); 4772 4773 /* insert mat values of mpimat */ 4774 PetscCall(PetscMalloc1(N, &ba_i)); 4775 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4776 4777 for (k = 0; k < merge->nrecv; k++) { 4778 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4779 nrows = *(buf_ri_k[k]); 4780 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4781 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4782 } 4783 4784 /* set values of ba */ 4785 m = merge->rowmap->n; 4786 for (i = 0; i < m; i++) { 4787 arow = owners[rank] + i; 4788 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4789 bnzi = bi[i + 1] - bi[i]; 4790 PetscCall(PetscArrayzero(ba_i, bnzi)); 4791 4792 /* add local non-zero vals of this proc's seqmat into ba */ 4793 anzi = ai[arow + 1] - ai[arow]; 4794 aj = a->j + ai[arow]; 4795 aa = a_a + ai[arow]; 4796 nextaj = 0; 4797 for (j = 0; nextaj < anzi; j++) { 4798 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4799 ba_i[j] += aa[nextaj++]; 4800 } 4801 } 4802 4803 /* add received vals into ba */ 4804 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4805 /* i-th row */ 4806 if (i == *nextrow[k]) { 4807 anzi = *(nextai[k] + 1) - *nextai[k]; 4808 aj = buf_rj[k] + *(nextai[k]); 4809 aa = abuf_r[k] + *(nextai[k]); 4810 nextaj = 0; 4811 for (j = 0; nextaj < anzi; j++) { 4812 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4813 ba_i[j] += aa[nextaj++]; 4814 } 4815 } 4816 nextrow[k]++; 4817 nextai[k]++; 4818 } 4819 } 4820 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4821 } 4822 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4823 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4824 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4825 4826 PetscCall(PetscFree(abuf_r[0])); 4827 PetscCall(PetscFree(abuf_r)); 4828 PetscCall(PetscFree(ba_i)); 4829 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4830 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4831 PetscFunctionReturn(PETSC_SUCCESS); 4832 } 4833 4834 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4835 { 4836 Mat B_mpi; 4837 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4838 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4839 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4840 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4841 PetscInt len, proc, *dnz, *onz, bs, cbs; 4842 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4843 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4844 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4845 MPI_Status *status; 4846 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4847 PetscBT lnkbt; 4848 Mat_Merge_SeqsToMPI *merge; 4849 PetscContainer container; 4850 4851 PetscFunctionBegin; 4852 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4853 4854 /* make sure it is a PETSc comm */ 4855 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4856 PetscCallMPI(MPI_Comm_size(comm, &size)); 4857 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4858 4859 PetscCall(PetscNew(&merge)); 4860 PetscCall(PetscMalloc1(size, &status)); 4861 4862 /* determine row ownership */ 4863 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4864 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4865 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4866 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4867 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4868 PetscCall(PetscMalloc1(size, &len_si)); 4869 PetscCall(PetscMalloc1(size, &merge->len_s)); 4870 4871 m = merge->rowmap->n; 4872 owners = merge->rowmap->range; 4873 4874 /* determine the number of messages to send, their lengths */ 4875 len_s = merge->len_s; 4876 4877 len = 0; /* length of buf_si[] */ 4878 merge->nsend = 0; 4879 for (proc = 0; proc < size; proc++) { 4880 len_si[proc] = 0; 4881 if (proc == rank) { 4882 len_s[proc] = 0; 4883 } else { 4884 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4885 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4886 } 4887 if (len_s[proc]) { 4888 merge->nsend++; 4889 nrows = 0; 4890 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4891 if (ai[i + 1] > ai[i]) nrows++; 4892 } 4893 len_si[proc] = 2 * (nrows + 1); 4894 len += len_si[proc]; 4895 } 4896 } 4897 4898 /* determine the number and length of messages to receive for ij-structure */ 4899 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4900 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4901 4902 /* post the Irecv of j-structure */ 4903 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4904 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4905 4906 /* post the Isend of j-structure */ 4907 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4908 4909 for (proc = 0, k = 0; proc < size; proc++) { 4910 if (!len_s[proc]) continue; 4911 i = owners[proc]; 4912 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4913 k++; 4914 } 4915 4916 /* receives and sends of j-structure are complete */ 4917 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4918 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4919 4920 /* send and recv i-structure */ 4921 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4922 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4923 4924 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4925 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4926 for (proc = 0, k = 0; proc < size; proc++) { 4927 if (!len_s[proc]) continue; 4928 /* form outgoing message for i-structure: 4929 buf_si[0]: nrows to be sent 4930 [1:nrows]: row index (global) 4931 [nrows+1:2*nrows+1]: i-structure index 4932 */ 4933 nrows = len_si[proc] / 2 - 1; 4934 buf_si_i = buf_si + nrows + 1; 4935 buf_si[0] = nrows; 4936 buf_si_i[0] = 0; 4937 nrows = 0; 4938 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4939 anzi = ai[i + 1] - ai[i]; 4940 if (anzi) { 4941 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4942 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4943 nrows++; 4944 } 4945 } 4946 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4947 k++; 4948 buf_si += len_si[proc]; 4949 } 4950 4951 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4952 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4953 4954 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4955 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4956 4957 PetscCall(PetscFree(len_si)); 4958 PetscCall(PetscFree(len_ri)); 4959 PetscCall(PetscFree(rj_waits)); 4960 PetscCall(PetscFree2(si_waits, sj_waits)); 4961 PetscCall(PetscFree(ri_waits)); 4962 PetscCall(PetscFree(buf_s)); 4963 PetscCall(PetscFree(status)); 4964 4965 /* compute a local seq matrix in each processor */ 4966 /* allocate bi array and free space for accumulating nonzero column info */ 4967 PetscCall(PetscMalloc1(m + 1, &bi)); 4968 bi[0] = 0; 4969 4970 /* create and initialize a linked list */ 4971 nlnk = N + 1; 4972 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4973 4974 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4975 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4976 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4977 4978 current_space = free_space; 4979 4980 /* determine symbolic info for each local row */ 4981 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4982 4983 for (k = 0; k < merge->nrecv; k++) { 4984 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4985 nrows = *buf_ri_k[k]; 4986 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4987 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4988 } 4989 4990 MatPreallocateBegin(comm, m, n, dnz, onz); 4991 len = 0; 4992 for (i = 0; i < m; i++) { 4993 bnzi = 0; 4994 /* add local non-zero cols of this proc's seqmat into lnk */ 4995 arow = owners[rank] + i; 4996 anzi = ai[arow + 1] - ai[arow]; 4997 aj = a->j + ai[arow]; 4998 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4999 bnzi += nlnk; 5000 /* add received col data into lnk */ 5001 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5002 if (i == *nextrow[k]) { /* i-th row */ 5003 anzi = *(nextai[k] + 1) - *nextai[k]; 5004 aj = buf_rj[k] + *nextai[k]; 5005 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5006 bnzi += nlnk; 5007 nextrow[k]++; 5008 nextai[k]++; 5009 } 5010 } 5011 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5012 5013 /* if free space is not available, make more free space */ 5014 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5015 /* copy data into free space, then initialize lnk */ 5016 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5017 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5018 5019 current_space->array += bnzi; 5020 current_space->local_used += bnzi; 5021 current_space->local_remaining -= bnzi; 5022 5023 bi[i + 1] = bi[i] + bnzi; 5024 } 5025 5026 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5027 5028 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5029 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5030 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5031 5032 /* create symbolic parallel matrix B_mpi */ 5033 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5034 PetscCall(MatCreate(comm, &B_mpi)); 5035 if (n == PETSC_DECIDE) { 5036 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5037 } else { 5038 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5039 } 5040 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5041 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5042 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5043 MatPreallocateEnd(dnz, onz); 5044 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5045 5046 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5047 B_mpi->assembled = PETSC_FALSE; 5048 merge->bi = bi; 5049 merge->bj = bj; 5050 merge->buf_ri = buf_ri; 5051 merge->buf_rj = buf_rj; 5052 merge->coi = NULL; 5053 merge->coj = NULL; 5054 merge->owners_co = NULL; 5055 5056 PetscCall(PetscCommDestroy(&comm)); 5057 5058 /* attach the supporting struct to B_mpi for reuse */ 5059 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5060 PetscCall(PetscContainerSetPointer(container, merge)); 5061 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5062 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5063 PetscCall(PetscContainerDestroy(&container)); 5064 *mpimat = B_mpi; 5065 5066 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5067 PetscFunctionReturn(PETSC_SUCCESS); 5068 } 5069 5070 /*@C 5071 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5072 matrices from each processor 5073 5074 Collective 5075 5076 Input Parameters: 5077 + comm - the communicators the parallel matrix will live on 5078 . seqmat - the input sequential matrices 5079 . m - number of local rows (or `PETSC_DECIDE`) 5080 . n - number of local columns (or `PETSC_DECIDE`) 5081 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5082 5083 Output Parameter: 5084 . mpimat - the parallel matrix generated 5085 5086 Level: advanced 5087 5088 Note: 5089 The dimensions of the sequential matrix in each processor MUST be the same. 5090 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5091 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5092 5093 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5094 @*/ 5095 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5096 { 5097 PetscMPIInt size; 5098 5099 PetscFunctionBegin; 5100 PetscCallMPI(MPI_Comm_size(comm, &size)); 5101 if (size == 1) { 5102 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5103 if (scall == MAT_INITIAL_MATRIX) { 5104 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5105 } else { 5106 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5107 } 5108 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5109 PetscFunctionReturn(PETSC_SUCCESS); 5110 } 5111 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5112 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5113 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5114 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5115 PetscFunctionReturn(PETSC_SUCCESS); 5116 } 5117 5118 /*@ 5119 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5120 5121 Not Collective 5122 5123 Input Parameter: 5124 . A - the matrix 5125 5126 Output Parameter: 5127 . A_loc - the local sequential matrix generated 5128 5129 Level: developer 5130 5131 Notes: 5132 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5133 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5134 `n` is the global column count obtained with `MatGetSize()` 5135 5136 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5137 5138 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5139 5140 Destroy the matrix with `MatDestroy()` 5141 5142 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5143 @*/ 5144 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5145 { 5146 PetscBool mpi; 5147 5148 PetscFunctionBegin; 5149 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5150 if (mpi) { 5151 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5152 } else { 5153 *A_loc = A; 5154 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5155 } 5156 PetscFunctionReturn(PETSC_SUCCESS); 5157 } 5158 5159 /*@ 5160 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5161 5162 Not Collective 5163 5164 Input Parameters: 5165 + A - the matrix 5166 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5167 5168 Output Parameter: 5169 . A_loc - the local sequential matrix generated 5170 5171 Level: developer 5172 5173 Notes: 5174 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5175 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5176 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5177 5178 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5179 5180 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5181 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5182 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5183 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5184 5185 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5186 @*/ 5187 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5188 { 5189 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5190 Mat_SeqAIJ *mat, *a, *b; 5191 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5192 const PetscScalar *aa, *ba, *aav, *bav; 5193 PetscScalar *ca, *cam; 5194 PetscMPIInt size; 5195 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5196 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5197 PetscBool match; 5198 5199 PetscFunctionBegin; 5200 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5201 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5202 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5203 if (size == 1) { 5204 if (scall == MAT_INITIAL_MATRIX) { 5205 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5206 *A_loc = mpimat->A; 5207 } else if (scall == MAT_REUSE_MATRIX) { 5208 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5209 } 5210 PetscFunctionReturn(PETSC_SUCCESS); 5211 } 5212 5213 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5214 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5215 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5216 ai = a->i; 5217 aj = a->j; 5218 bi = b->i; 5219 bj = b->j; 5220 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5221 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5222 aa = aav; 5223 ba = bav; 5224 if (scall == MAT_INITIAL_MATRIX) { 5225 PetscCall(PetscMalloc1(1 + am, &ci)); 5226 ci[0] = 0; 5227 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5228 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5229 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5230 k = 0; 5231 for (i = 0; i < am; i++) { 5232 ncols_o = bi[i + 1] - bi[i]; 5233 ncols_d = ai[i + 1] - ai[i]; 5234 /* off-diagonal portion of A */ 5235 for (jo = 0; jo < ncols_o; jo++) { 5236 col = cmap[*bj]; 5237 if (col >= cstart) break; 5238 cj[k] = col; 5239 bj++; 5240 ca[k++] = *ba++; 5241 } 5242 /* diagonal portion of A */ 5243 for (j = 0; j < ncols_d; j++) { 5244 cj[k] = cstart + *aj++; 5245 ca[k++] = *aa++; 5246 } 5247 /* off-diagonal portion of A */ 5248 for (j = jo; j < ncols_o; j++) { 5249 cj[k] = cmap[*bj++]; 5250 ca[k++] = *ba++; 5251 } 5252 } 5253 /* put together the new matrix */ 5254 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5255 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5256 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5257 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5258 mat->free_a = PETSC_TRUE; 5259 mat->free_ij = PETSC_TRUE; 5260 mat->nonew = 0; 5261 } else if (scall == MAT_REUSE_MATRIX) { 5262 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5263 ci = mat->i; 5264 cj = mat->j; 5265 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5266 for (i = 0; i < am; i++) { 5267 /* off-diagonal portion of A */ 5268 ncols_o = bi[i + 1] - bi[i]; 5269 for (jo = 0; jo < ncols_o; jo++) { 5270 col = cmap[*bj]; 5271 if (col >= cstart) break; 5272 *cam++ = *ba++; 5273 bj++; 5274 } 5275 /* diagonal portion of A */ 5276 ncols_d = ai[i + 1] - ai[i]; 5277 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5278 /* off-diagonal portion of A */ 5279 for (j = jo; j < ncols_o; j++) { 5280 *cam++ = *ba++; 5281 bj++; 5282 } 5283 } 5284 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5285 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5286 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5287 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5288 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5289 PetscFunctionReturn(PETSC_SUCCESS); 5290 } 5291 5292 /*@ 5293 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5294 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5295 5296 Not Collective 5297 5298 Input Parameters: 5299 + A - the matrix 5300 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5301 5302 Output Parameters: 5303 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5304 - A_loc - the local sequential matrix generated 5305 5306 Level: developer 5307 5308 Note: 5309 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5310 part, then those associated with the off-diagonal part (in its local ordering) 5311 5312 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5313 @*/ 5314 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5315 { 5316 Mat Ao, Ad; 5317 const PetscInt *cmap; 5318 PetscMPIInt size; 5319 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5320 5321 PetscFunctionBegin; 5322 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5323 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5324 if (size == 1) { 5325 if (scall == MAT_INITIAL_MATRIX) { 5326 PetscCall(PetscObjectReference((PetscObject)Ad)); 5327 *A_loc = Ad; 5328 } else if (scall == MAT_REUSE_MATRIX) { 5329 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5330 } 5331 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5332 PetscFunctionReturn(PETSC_SUCCESS); 5333 } 5334 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5335 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5336 if (f) { 5337 PetscCall((*f)(A, scall, glob, A_loc)); 5338 } else { 5339 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5340 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5341 Mat_SeqAIJ *c; 5342 PetscInt *ai = a->i, *aj = a->j; 5343 PetscInt *bi = b->i, *bj = b->j; 5344 PetscInt *ci, *cj; 5345 const PetscScalar *aa, *ba; 5346 PetscScalar *ca; 5347 PetscInt i, j, am, dn, on; 5348 5349 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5350 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5351 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5352 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5353 if (scall == MAT_INITIAL_MATRIX) { 5354 PetscInt k; 5355 PetscCall(PetscMalloc1(1 + am, &ci)); 5356 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5357 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5358 ci[0] = 0; 5359 for (i = 0, k = 0; i < am; i++) { 5360 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5361 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5362 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5363 /* diagonal portion of A */ 5364 for (j = 0; j < ncols_d; j++, k++) { 5365 cj[k] = *aj++; 5366 ca[k] = *aa++; 5367 } 5368 /* off-diagonal portion of A */ 5369 for (j = 0; j < ncols_o; j++, k++) { 5370 cj[k] = dn + *bj++; 5371 ca[k] = *ba++; 5372 } 5373 } 5374 /* put together the new matrix */ 5375 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5376 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5377 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5378 c = (Mat_SeqAIJ *)(*A_loc)->data; 5379 c->free_a = PETSC_TRUE; 5380 c->free_ij = PETSC_TRUE; 5381 c->nonew = 0; 5382 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5383 } else if (scall == MAT_REUSE_MATRIX) { 5384 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5385 for (i = 0; i < am; i++) { 5386 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5387 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5388 /* diagonal portion of A */ 5389 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5390 /* off-diagonal portion of A */ 5391 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5392 } 5393 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5394 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5395 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5396 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5397 if (glob) { 5398 PetscInt cst, *gidx; 5399 5400 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5401 PetscCall(PetscMalloc1(dn + on, &gidx)); 5402 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5403 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5404 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5405 } 5406 } 5407 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5408 PetscFunctionReturn(PETSC_SUCCESS); 5409 } 5410 5411 /*@C 5412 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5413 5414 Not Collective 5415 5416 Input Parameters: 5417 + A - the matrix 5418 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5419 . row - index set of rows to extract (or `NULL`) 5420 - col - index set of columns to extract (or `NULL`) 5421 5422 Output Parameter: 5423 . A_loc - the local sequential matrix generated 5424 5425 Level: developer 5426 5427 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5428 @*/ 5429 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5430 { 5431 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5432 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5433 IS isrowa, iscola; 5434 Mat *aloc; 5435 PetscBool match; 5436 5437 PetscFunctionBegin; 5438 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5439 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5440 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5441 if (!row) { 5442 start = A->rmap->rstart; 5443 end = A->rmap->rend; 5444 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5445 } else { 5446 isrowa = *row; 5447 } 5448 if (!col) { 5449 start = A->cmap->rstart; 5450 cmap = a->garray; 5451 nzA = a->A->cmap->n; 5452 nzB = a->B->cmap->n; 5453 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5454 ncols = 0; 5455 for (i = 0; i < nzB; i++) { 5456 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5457 else break; 5458 } 5459 imark = i; 5460 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5461 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5462 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5463 } else { 5464 iscola = *col; 5465 } 5466 if (scall != MAT_INITIAL_MATRIX) { 5467 PetscCall(PetscMalloc1(1, &aloc)); 5468 aloc[0] = *A_loc; 5469 } 5470 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5471 if (!col) { /* attach global id of condensed columns */ 5472 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5473 } 5474 *A_loc = aloc[0]; 5475 PetscCall(PetscFree(aloc)); 5476 if (!row) PetscCall(ISDestroy(&isrowa)); 5477 if (!col) PetscCall(ISDestroy(&iscola)); 5478 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5479 PetscFunctionReturn(PETSC_SUCCESS); 5480 } 5481 5482 /* 5483 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5484 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5485 * on a global size. 5486 * */ 5487 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5488 { 5489 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5490 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5491 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5492 PetscMPIInt owner; 5493 PetscSFNode *iremote, *oiremote; 5494 const PetscInt *lrowindices; 5495 PetscSF sf, osf; 5496 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5497 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5498 MPI_Comm comm; 5499 ISLocalToGlobalMapping mapping; 5500 const PetscScalar *pd_a, *po_a; 5501 5502 PetscFunctionBegin; 5503 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5504 /* plocalsize is the number of roots 5505 * nrows is the number of leaves 5506 * */ 5507 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5508 PetscCall(ISGetLocalSize(rows, &nrows)); 5509 PetscCall(PetscCalloc1(nrows, &iremote)); 5510 PetscCall(ISGetIndices(rows, &lrowindices)); 5511 for (i = 0; i < nrows; i++) { 5512 /* Find a remote index and an owner for a row 5513 * The row could be local or remote 5514 * */ 5515 owner = 0; 5516 lidx = 0; 5517 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5518 iremote[i].index = lidx; 5519 iremote[i].rank = owner; 5520 } 5521 /* Create SF to communicate how many nonzero columns for each row */ 5522 PetscCall(PetscSFCreate(comm, &sf)); 5523 /* SF will figure out the number of nonzero columns for each row, and their 5524 * offsets 5525 * */ 5526 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5527 PetscCall(PetscSFSetFromOptions(sf)); 5528 PetscCall(PetscSFSetUp(sf)); 5529 5530 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5531 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5532 PetscCall(PetscCalloc1(nrows, &pnnz)); 5533 roffsets[0] = 0; 5534 roffsets[1] = 0; 5535 for (i = 0; i < plocalsize; i++) { 5536 /* diagonal */ 5537 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5538 /* off-diagonal */ 5539 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5540 /* compute offsets so that we relative location for each row */ 5541 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5542 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5543 } 5544 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5545 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5546 /* 'r' means root, and 'l' means leaf */ 5547 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5548 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5549 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5550 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5551 PetscCall(PetscSFDestroy(&sf)); 5552 PetscCall(PetscFree(roffsets)); 5553 PetscCall(PetscFree(nrcols)); 5554 dntotalcols = 0; 5555 ontotalcols = 0; 5556 ncol = 0; 5557 for (i = 0; i < nrows; i++) { 5558 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5559 ncol = PetscMax(pnnz[i], ncol); 5560 /* diagonal */ 5561 dntotalcols += nlcols[i * 2 + 0]; 5562 /* off-diagonal */ 5563 ontotalcols += nlcols[i * 2 + 1]; 5564 } 5565 /* We do not need to figure the right number of columns 5566 * since all the calculations will be done by going through the raw data 5567 * */ 5568 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5569 PetscCall(MatSetUp(*P_oth)); 5570 PetscCall(PetscFree(pnnz)); 5571 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5572 /* diagonal */ 5573 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5574 /* off-diagonal */ 5575 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5576 /* diagonal */ 5577 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5578 /* off-diagonal */ 5579 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5580 dntotalcols = 0; 5581 ontotalcols = 0; 5582 ntotalcols = 0; 5583 for (i = 0; i < nrows; i++) { 5584 owner = 0; 5585 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5586 /* Set iremote for diag matrix */ 5587 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5588 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5589 iremote[dntotalcols].rank = owner; 5590 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5591 ilocal[dntotalcols++] = ntotalcols++; 5592 } 5593 /* off-diagonal */ 5594 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5595 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5596 oiremote[ontotalcols].rank = owner; 5597 oilocal[ontotalcols++] = ntotalcols++; 5598 } 5599 } 5600 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5601 PetscCall(PetscFree(loffsets)); 5602 PetscCall(PetscFree(nlcols)); 5603 PetscCall(PetscSFCreate(comm, &sf)); 5604 /* P serves as roots and P_oth is leaves 5605 * Diag matrix 5606 * */ 5607 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5608 PetscCall(PetscSFSetFromOptions(sf)); 5609 PetscCall(PetscSFSetUp(sf)); 5610 5611 PetscCall(PetscSFCreate(comm, &osf)); 5612 /* off-diagonal */ 5613 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5614 PetscCall(PetscSFSetFromOptions(osf)); 5615 PetscCall(PetscSFSetUp(osf)); 5616 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5617 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5618 /* operate on the matrix internal data to save memory */ 5619 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5620 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5621 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5622 /* Convert to global indices for diag matrix */ 5623 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5624 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5625 /* We want P_oth store global indices */ 5626 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5627 /* Use memory scalable approach */ 5628 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5629 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5630 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5631 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5632 /* Convert back to local indices */ 5633 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5634 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5635 nout = 0; 5636 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5637 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5638 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5639 /* Exchange values */ 5640 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5641 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5642 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5643 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5644 /* Stop PETSc from shrinking memory */ 5645 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5646 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5647 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5648 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5649 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5650 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5651 PetscCall(PetscSFDestroy(&sf)); 5652 PetscCall(PetscSFDestroy(&osf)); 5653 PetscFunctionReturn(PETSC_SUCCESS); 5654 } 5655 5656 /* 5657 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5658 * This supports MPIAIJ and MAIJ 5659 * */ 5660 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5661 { 5662 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5663 Mat_SeqAIJ *p_oth; 5664 IS rows, map; 5665 PetscHMapI hamp; 5666 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5667 MPI_Comm comm; 5668 PetscSF sf, osf; 5669 PetscBool has; 5670 5671 PetscFunctionBegin; 5672 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5673 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5674 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5675 * and then create a submatrix (that often is an overlapping matrix) 5676 * */ 5677 if (reuse == MAT_INITIAL_MATRIX) { 5678 /* Use a hash table to figure out unique keys */ 5679 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5680 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5681 count = 0; 5682 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5683 for (i = 0; i < a->B->cmap->n; i++) { 5684 key = a->garray[i] / dof; 5685 PetscCall(PetscHMapIHas(hamp, key, &has)); 5686 if (!has) { 5687 mapping[i] = count; 5688 PetscCall(PetscHMapISet(hamp, key, count++)); 5689 } else { 5690 /* Current 'i' has the same value the previous step */ 5691 mapping[i] = count - 1; 5692 } 5693 } 5694 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5695 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5696 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5697 PetscCall(PetscCalloc1(htsize, &rowindices)); 5698 off = 0; 5699 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5700 PetscCall(PetscHMapIDestroy(&hamp)); 5701 PetscCall(PetscSortInt(htsize, rowindices)); 5702 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5703 /* In case, the matrix was already created but users want to recreate the matrix */ 5704 PetscCall(MatDestroy(P_oth)); 5705 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5706 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5707 PetscCall(ISDestroy(&map)); 5708 PetscCall(ISDestroy(&rows)); 5709 } else if (reuse == MAT_REUSE_MATRIX) { 5710 /* If matrix was already created, we simply update values using SF objects 5711 * that as attached to the matrix earlier. 5712 */ 5713 const PetscScalar *pd_a, *po_a; 5714 5715 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5716 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5717 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5718 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5719 /* Update values in place */ 5720 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5721 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5722 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5723 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5724 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5725 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5726 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5727 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5728 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5729 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5730 PetscFunctionReturn(PETSC_SUCCESS); 5731 } 5732 5733 /*@C 5734 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5735 5736 Collective 5737 5738 Input Parameters: 5739 + A - the first matrix in `MATMPIAIJ` format 5740 . B - the second matrix in `MATMPIAIJ` format 5741 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5742 5743 Output Parameters: 5744 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5745 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5746 - B_seq - the sequential matrix generated 5747 5748 Level: developer 5749 5750 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5751 @*/ 5752 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5753 { 5754 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5755 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5756 IS isrowb, iscolb; 5757 Mat *bseq = NULL; 5758 5759 PetscFunctionBegin; 5760 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5761 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5762 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5763 5764 if (scall == MAT_INITIAL_MATRIX) { 5765 start = A->cmap->rstart; 5766 cmap = a->garray; 5767 nzA = a->A->cmap->n; 5768 nzB = a->B->cmap->n; 5769 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5770 ncols = 0; 5771 for (i = 0; i < nzB; i++) { /* row < local row index */ 5772 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5773 else break; 5774 } 5775 imark = i; 5776 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5777 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5778 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5779 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5780 } else { 5781 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5782 isrowb = *rowb; 5783 iscolb = *colb; 5784 PetscCall(PetscMalloc1(1, &bseq)); 5785 bseq[0] = *B_seq; 5786 } 5787 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5788 *B_seq = bseq[0]; 5789 PetscCall(PetscFree(bseq)); 5790 if (!rowb) { 5791 PetscCall(ISDestroy(&isrowb)); 5792 } else { 5793 *rowb = isrowb; 5794 } 5795 if (!colb) { 5796 PetscCall(ISDestroy(&iscolb)); 5797 } else { 5798 *colb = iscolb; 5799 } 5800 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5801 PetscFunctionReturn(PETSC_SUCCESS); 5802 } 5803 5804 /* 5805 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5806 of the OFF-DIAGONAL portion of local A 5807 5808 Collective 5809 5810 Input Parameters: 5811 + A,B - the matrices in `MATMPIAIJ` format 5812 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5813 5814 Output Parameter: 5815 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5816 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5817 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5818 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5819 5820 Developer Note: 5821 This directly accesses information inside the VecScatter associated with the matrix-vector product 5822 for this matrix. This is not desirable.. 5823 5824 Level: developer 5825 5826 */ 5827 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5828 { 5829 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5830 Mat_SeqAIJ *b_oth; 5831 VecScatter ctx; 5832 MPI_Comm comm; 5833 const PetscMPIInt *rprocs, *sprocs; 5834 const PetscInt *srow, *rstarts, *sstarts; 5835 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5836 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5837 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5838 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5839 PetscMPIInt size, tag, rank, nreqs; 5840 5841 PetscFunctionBegin; 5842 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5843 PetscCallMPI(MPI_Comm_size(comm, &size)); 5844 5845 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5846 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5847 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5848 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5849 5850 if (size == 1) { 5851 startsj_s = NULL; 5852 bufa_ptr = NULL; 5853 *B_oth = NULL; 5854 PetscFunctionReturn(PETSC_SUCCESS); 5855 } 5856 5857 ctx = a->Mvctx; 5858 tag = ((PetscObject)ctx)->tag; 5859 5860 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5861 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5862 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5863 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5864 PetscCall(PetscMalloc1(nreqs, &reqs)); 5865 rwaits = reqs; 5866 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5867 5868 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5869 if (scall == MAT_INITIAL_MATRIX) { 5870 /* i-array */ 5871 /* post receives */ 5872 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5873 for (i = 0; i < nrecvs; i++) { 5874 rowlen = rvalues + rstarts[i] * rbs; 5875 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5876 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5877 } 5878 5879 /* pack the outgoing message */ 5880 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5881 5882 sstartsj[0] = 0; 5883 rstartsj[0] = 0; 5884 len = 0; /* total length of j or a array to be sent */ 5885 if (nsends) { 5886 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5887 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5888 } 5889 for (i = 0; i < nsends; i++) { 5890 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5891 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5892 for (j = 0; j < nrows; j++) { 5893 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5894 for (l = 0; l < sbs; l++) { 5895 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5896 5897 rowlen[j * sbs + l] = ncols; 5898 5899 len += ncols; 5900 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5901 } 5902 k++; 5903 } 5904 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5905 5906 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5907 } 5908 /* recvs and sends of i-array are completed */ 5909 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5910 PetscCall(PetscFree(svalues)); 5911 5912 /* allocate buffers for sending j and a arrays */ 5913 PetscCall(PetscMalloc1(len + 1, &bufj)); 5914 PetscCall(PetscMalloc1(len + 1, &bufa)); 5915 5916 /* create i-array of B_oth */ 5917 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5918 5919 b_othi[0] = 0; 5920 len = 0; /* total length of j or a array to be received */ 5921 k = 0; 5922 for (i = 0; i < nrecvs; i++) { 5923 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5924 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5925 for (j = 0; j < nrows; j++) { 5926 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5927 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5928 k++; 5929 } 5930 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5931 } 5932 PetscCall(PetscFree(rvalues)); 5933 5934 /* allocate space for j and a arrays of B_oth */ 5935 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5936 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5937 5938 /* j-array */ 5939 /* post receives of j-array */ 5940 for (i = 0; i < nrecvs; i++) { 5941 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5942 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5943 } 5944 5945 /* pack the outgoing message j-array */ 5946 if (nsends) k = sstarts[0]; 5947 for (i = 0; i < nsends; i++) { 5948 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5949 bufJ = bufj + sstartsj[i]; 5950 for (j = 0; j < nrows; j++) { 5951 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5952 for (ll = 0; ll < sbs; ll++) { 5953 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5954 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5955 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5956 } 5957 } 5958 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5959 } 5960 5961 /* recvs and sends of j-array are completed */ 5962 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5963 } else if (scall == MAT_REUSE_MATRIX) { 5964 sstartsj = *startsj_s; 5965 rstartsj = *startsj_r; 5966 bufa = *bufa_ptr; 5967 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5968 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5969 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5970 5971 /* a-array */ 5972 /* post receives of a-array */ 5973 for (i = 0; i < nrecvs; i++) { 5974 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5975 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5976 } 5977 5978 /* pack the outgoing message a-array */ 5979 if (nsends) k = sstarts[0]; 5980 for (i = 0; i < nsends; i++) { 5981 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5982 bufA = bufa + sstartsj[i]; 5983 for (j = 0; j < nrows; j++) { 5984 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5985 for (ll = 0; ll < sbs; ll++) { 5986 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5987 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5988 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5989 } 5990 } 5991 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5992 } 5993 /* recvs and sends of a-array are completed */ 5994 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5995 PetscCall(PetscFree(reqs)); 5996 5997 if (scall == MAT_INITIAL_MATRIX) { 5998 /* put together the new matrix */ 5999 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6000 6001 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6002 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6003 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6004 b_oth->free_a = PETSC_TRUE; 6005 b_oth->free_ij = PETSC_TRUE; 6006 b_oth->nonew = 0; 6007 6008 PetscCall(PetscFree(bufj)); 6009 if (!startsj_s || !bufa_ptr) { 6010 PetscCall(PetscFree2(sstartsj, rstartsj)); 6011 PetscCall(PetscFree(bufa_ptr)); 6012 } else { 6013 *startsj_s = sstartsj; 6014 *startsj_r = rstartsj; 6015 *bufa_ptr = bufa; 6016 } 6017 } else if (scall == MAT_REUSE_MATRIX) { 6018 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6019 } 6020 6021 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6022 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6023 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6024 PetscFunctionReturn(PETSC_SUCCESS); 6025 } 6026 6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6030 #if defined(PETSC_HAVE_MKL_SPARSE) 6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6032 #endif 6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6035 #if defined(PETSC_HAVE_ELEMENTAL) 6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6037 #endif 6038 #if defined(PETSC_HAVE_SCALAPACK) 6039 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6040 #endif 6041 #if defined(PETSC_HAVE_HYPRE) 6042 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6043 #endif 6044 #if defined(PETSC_HAVE_CUDA) 6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6046 #endif 6047 #if defined(PETSC_HAVE_HIP) 6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6049 #endif 6050 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6052 #endif 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6054 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6055 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6056 6057 /* 6058 Computes (B'*A')' since computing B*A directly is untenable 6059 6060 n p p 6061 [ ] [ ] [ ] 6062 m [ A ] * n [ B ] = m [ C ] 6063 [ ] [ ] [ ] 6064 6065 */ 6066 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6067 { 6068 Mat At, Bt, Ct; 6069 6070 PetscFunctionBegin; 6071 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6072 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6073 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6074 PetscCall(MatDestroy(&At)); 6075 PetscCall(MatDestroy(&Bt)); 6076 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6077 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6078 PetscCall(MatDestroy(&Ct)); 6079 PetscFunctionReturn(PETSC_SUCCESS); 6080 } 6081 6082 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6083 { 6084 PetscBool cisdense; 6085 6086 PetscFunctionBegin; 6087 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6088 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6089 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6090 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6091 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6092 PetscCall(MatSetUp(C)); 6093 6094 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6095 PetscFunctionReturn(PETSC_SUCCESS); 6096 } 6097 6098 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6099 { 6100 Mat_Product *product = C->product; 6101 Mat A = product->A, B = product->B; 6102 6103 PetscFunctionBegin; 6104 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6105 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6106 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6107 C->ops->productsymbolic = MatProductSymbolic_AB; 6108 PetscFunctionReturn(PETSC_SUCCESS); 6109 } 6110 6111 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6112 { 6113 Mat_Product *product = C->product; 6114 6115 PetscFunctionBegin; 6116 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6117 PetscFunctionReturn(PETSC_SUCCESS); 6118 } 6119 6120 /* 6121 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6122 6123 Input Parameters: 6124 6125 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6126 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6127 6128 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6129 6130 For Set1, j1[] contains column indices of the nonzeros. 6131 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6132 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6133 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6134 6135 Similar for Set2. 6136 6137 This routine merges the two sets of nonzeros row by row and removes repeats. 6138 6139 Output Parameters: (memory is allocated by the caller) 6140 6141 i[],j[]: the CSR of the merged matrix, which has m rows. 6142 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6143 imap2[]: similar to imap1[], but for Set2. 6144 Note we order nonzeros row-by-row and from left to right. 6145 */ 6146 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6147 { 6148 PetscInt r, m; /* Row index of mat */ 6149 PetscCount t, t1, t2, b1, e1, b2, e2; 6150 6151 PetscFunctionBegin; 6152 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6153 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6154 i[0] = 0; 6155 for (r = 0; r < m; r++) { /* Do row by row merging */ 6156 b1 = rowBegin1[r]; 6157 e1 = rowEnd1[r]; 6158 b2 = rowBegin2[r]; 6159 e2 = rowEnd2[r]; 6160 while (b1 < e1 && b2 < e2) { 6161 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6162 j[t] = j1[b1]; 6163 imap1[t1] = t; 6164 imap2[t2] = t; 6165 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6166 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6167 t1++; 6168 t2++; 6169 t++; 6170 } else if (j1[b1] < j2[b2]) { 6171 j[t] = j1[b1]; 6172 imap1[t1] = t; 6173 b1 += jmap1[t1 + 1] - jmap1[t1]; 6174 t1++; 6175 t++; 6176 } else { 6177 j[t] = j2[b2]; 6178 imap2[t2] = t; 6179 b2 += jmap2[t2 + 1] - jmap2[t2]; 6180 t2++; 6181 t++; 6182 } 6183 } 6184 /* Merge the remaining in either j1[] or j2[] */ 6185 while (b1 < e1) { 6186 j[t] = j1[b1]; 6187 imap1[t1] = t; 6188 b1 += jmap1[t1 + 1] - jmap1[t1]; 6189 t1++; 6190 t++; 6191 } 6192 while (b2 < e2) { 6193 j[t] = j2[b2]; 6194 imap2[t2] = t; 6195 b2 += jmap2[t2 + 1] - jmap2[t2]; 6196 t2++; 6197 t++; 6198 } 6199 i[r + 1] = t; 6200 } 6201 PetscFunctionReturn(PETSC_SUCCESS); 6202 } 6203 6204 /* 6205 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6206 6207 Input Parameters: 6208 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6209 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6210 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6211 6212 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6213 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6214 6215 Output Parameters: 6216 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6217 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6218 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6219 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6220 6221 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6222 Atot: number of entries belonging to the diagonal block. 6223 Annz: number of unique nonzeros belonging to the diagonal block. 6224 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6225 repeats (i.e., same 'i,j' pair). 6226 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6227 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6228 6229 Atot: number of entries belonging to the diagonal block 6230 Annz: number of unique nonzeros belonging to the diagonal block. 6231 6232 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6233 6234 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6235 */ 6236 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6237 { 6238 PetscInt cstart, cend, rstart, rend, row, col; 6239 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6240 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6241 PetscCount k, m, p, q, r, s, mid; 6242 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6243 6244 PetscFunctionBegin; 6245 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6246 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6247 m = rend - rstart; 6248 6249 /* Skip negative rows */ 6250 for (k = 0; k < n; k++) 6251 if (i[k] >= 0) break; 6252 6253 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6254 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6255 */ 6256 while (k < n) { 6257 row = i[k]; 6258 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6259 for (s = k; s < n; s++) 6260 if (i[s] != row) break; 6261 6262 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6263 for (p = k; p < s; p++) { 6264 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6265 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6266 } 6267 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6268 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6269 rowBegin[row - rstart] = k; 6270 rowMid[row - rstart] = mid; 6271 rowEnd[row - rstart] = s; 6272 6273 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6274 Atot += mid - k; 6275 Btot += s - mid; 6276 6277 /* Count unique nonzeros of this diag row */ 6278 for (p = k; p < mid;) { 6279 col = j[p]; 6280 do { 6281 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6282 p++; 6283 } while (p < mid && j[p] == col); 6284 Annz++; 6285 } 6286 6287 /* Count unique nonzeros of this offdiag row */ 6288 for (p = mid; p < s;) { 6289 col = j[p]; 6290 do { 6291 p++; 6292 } while (p < s && j[p] == col); 6293 Bnnz++; 6294 } 6295 k = s; 6296 } 6297 6298 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6299 PetscCall(PetscMalloc1(Atot, &Aperm)); 6300 PetscCall(PetscMalloc1(Btot, &Bperm)); 6301 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6302 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6303 6304 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6305 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6306 for (r = 0; r < m; r++) { 6307 k = rowBegin[r]; 6308 mid = rowMid[r]; 6309 s = rowEnd[r]; 6310 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6311 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6312 Atot += mid - k; 6313 Btot += s - mid; 6314 6315 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6316 for (p = k; p < mid;) { 6317 col = j[p]; 6318 q = p; 6319 do { 6320 p++; 6321 } while (p < mid && j[p] == col); 6322 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6323 Annz++; 6324 } 6325 6326 for (p = mid; p < s;) { 6327 col = j[p]; 6328 q = p; 6329 do { 6330 p++; 6331 } while (p < s && j[p] == col); 6332 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6333 Bnnz++; 6334 } 6335 } 6336 /* Output */ 6337 *Aperm_ = Aperm; 6338 *Annz_ = Annz; 6339 *Atot_ = Atot; 6340 *Ajmap_ = Ajmap; 6341 *Bperm_ = Bperm; 6342 *Bnnz_ = Bnnz; 6343 *Btot_ = Btot; 6344 *Bjmap_ = Bjmap; 6345 PetscFunctionReturn(PETSC_SUCCESS); 6346 } 6347 6348 /* 6349 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6350 6351 Input Parameters: 6352 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6353 nnz: number of unique nonzeros in the merged matrix 6354 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6355 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6356 6357 Output Parameter: (memory is allocated by the caller) 6358 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6359 6360 Example: 6361 nnz1 = 4 6362 nnz = 6 6363 imap = [1,3,4,5] 6364 jmap = [0,3,5,6,7] 6365 then, 6366 jmap_new = [0,0,3,3,5,6,7] 6367 */ 6368 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6369 { 6370 PetscCount k, p; 6371 6372 PetscFunctionBegin; 6373 jmap_new[0] = 0; 6374 p = nnz; /* p loops over jmap_new[] backwards */ 6375 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6376 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6377 } 6378 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6379 PetscFunctionReturn(PETSC_SUCCESS); 6380 } 6381 6382 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6383 { 6384 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6385 6386 PetscFunctionBegin; 6387 PetscCall(PetscSFDestroy(&coo->sf)); 6388 PetscCall(PetscFree(coo->Aperm1)); 6389 PetscCall(PetscFree(coo->Bperm1)); 6390 PetscCall(PetscFree(coo->Ajmap1)); 6391 PetscCall(PetscFree(coo->Bjmap1)); 6392 PetscCall(PetscFree(coo->Aimap2)); 6393 PetscCall(PetscFree(coo->Bimap2)); 6394 PetscCall(PetscFree(coo->Aperm2)); 6395 PetscCall(PetscFree(coo->Bperm2)); 6396 PetscCall(PetscFree(coo->Ajmap2)); 6397 PetscCall(PetscFree(coo->Bjmap2)); 6398 PetscCall(PetscFree(coo->Cperm1)); 6399 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6400 PetscCall(PetscFree(coo)); 6401 PetscFunctionReturn(PETSC_SUCCESS); 6402 } 6403 6404 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6405 { 6406 MPI_Comm comm; 6407 PetscMPIInt rank, size; 6408 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6409 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6410 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6411 PetscContainer container; 6412 MatCOOStruct_MPIAIJ *coo; 6413 6414 PetscFunctionBegin; 6415 PetscCall(PetscFree(mpiaij->garray)); 6416 PetscCall(VecDestroy(&mpiaij->lvec)); 6417 #if defined(PETSC_USE_CTABLE) 6418 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6419 #else 6420 PetscCall(PetscFree(mpiaij->colmap)); 6421 #endif 6422 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6423 mat->assembled = PETSC_FALSE; 6424 mat->was_assembled = PETSC_FALSE; 6425 6426 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6427 PetscCallMPI(MPI_Comm_size(comm, &size)); 6428 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6429 PetscCall(PetscLayoutSetUp(mat->rmap)); 6430 PetscCall(PetscLayoutSetUp(mat->cmap)); 6431 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6432 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6433 PetscCall(MatGetLocalSize(mat, &m, &n)); 6434 PetscCall(MatGetSize(mat, &M, &N)); 6435 6436 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6437 /* entries come first, then local rows, then remote rows. */ 6438 PetscCount n1 = coo_n, *perm1; 6439 PetscInt *i1 = coo_i, *j1 = coo_j; 6440 6441 PetscCall(PetscMalloc1(n1, &perm1)); 6442 for (k = 0; k < n1; k++) perm1[k] = k; 6443 6444 /* Manipulate indices so that entries with negative row or col indices will have smallest 6445 row indices, local entries will have greater but negative row indices, and remote entries 6446 will have positive row indices. 6447 */ 6448 for (k = 0; k < n1; k++) { 6449 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6450 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6451 else { 6452 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6453 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6454 } 6455 } 6456 6457 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6458 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6459 6460 /* Advance k to the first entry we need to take care of */ 6461 for (k = 0; k < n1; k++) 6462 if (i1[k] > PETSC_MIN_INT) break; 6463 PetscInt i1start = k; 6464 6465 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6466 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6467 6468 /* Send remote rows to their owner */ 6469 /* Find which rows should be sent to which remote ranks*/ 6470 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6471 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6472 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6473 const PetscInt *ranges; 6474 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6475 6476 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6477 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6478 for (k = rem; k < n1;) { 6479 PetscMPIInt owner; 6480 PetscInt firstRow, lastRow; 6481 6482 /* Locate a row range */ 6483 firstRow = i1[k]; /* first row of this owner */ 6484 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6485 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6486 6487 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6488 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6489 6490 /* All entries in [k,p) belong to this remote owner */ 6491 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6492 PetscMPIInt *sendto2; 6493 PetscInt *nentries2; 6494 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6495 6496 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6497 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6498 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6499 PetscCall(PetscFree2(sendto, nentries2)); 6500 sendto = sendto2; 6501 nentries = nentries2; 6502 maxNsend = maxNsend2; 6503 } 6504 sendto[nsend] = owner; 6505 nentries[nsend] = p - k; 6506 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6507 nsend++; 6508 k = p; 6509 } 6510 6511 /* Build 1st SF to know offsets on remote to send data */ 6512 PetscSF sf1; 6513 PetscInt nroots = 1, nroots2 = 0; 6514 PetscInt nleaves = nsend, nleaves2 = 0; 6515 PetscInt *offsets; 6516 PetscSFNode *iremote; 6517 6518 PetscCall(PetscSFCreate(comm, &sf1)); 6519 PetscCall(PetscMalloc1(nsend, &iremote)); 6520 PetscCall(PetscMalloc1(nsend, &offsets)); 6521 for (k = 0; k < nsend; k++) { 6522 iremote[k].rank = sendto[k]; 6523 iremote[k].index = 0; 6524 nleaves2 += nentries[k]; 6525 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6526 } 6527 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6528 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6529 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6530 PetscCall(PetscSFDestroy(&sf1)); 6531 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6532 6533 /* Build 2nd SF to send remote COOs to their owner */ 6534 PetscSF sf2; 6535 nroots = nroots2; 6536 nleaves = nleaves2; 6537 PetscCall(PetscSFCreate(comm, &sf2)); 6538 PetscCall(PetscSFSetFromOptions(sf2)); 6539 PetscCall(PetscMalloc1(nleaves, &iremote)); 6540 p = 0; 6541 for (k = 0; k < nsend; k++) { 6542 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6543 for (q = 0; q < nentries[k]; q++, p++) { 6544 iremote[p].rank = sendto[k]; 6545 iremote[p].index = offsets[k] + q; 6546 } 6547 } 6548 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6549 6550 /* Send the remote COOs to their owner */ 6551 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6552 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6553 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6554 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6555 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6556 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6557 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6558 6559 PetscCall(PetscFree(offsets)); 6560 PetscCall(PetscFree2(sendto, nentries)); 6561 6562 /* Sort received COOs by row along with the permutation array */ 6563 for (k = 0; k < n2; k++) perm2[k] = k; 6564 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6565 6566 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6567 PetscCount *Cperm1; 6568 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6569 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves)); 6570 6571 /* Support for HYPRE matrices, kind of a hack. 6572 Swap min column with diagonal so that diagonal values will go first */ 6573 PetscBool hypre; 6574 const char *name; 6575 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6576 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6577 if (hypre) { 6578 PetscInt *minj; 6579 PetscBT hasdiag; 6580 6581 PetscCall(PetscBTCreate(m, &hasdiag)); 6582 PetscCall(PetscMalloc1(m, &minj)); 6583 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6584 for (k = i1start; k < rem; k++) { 6585 if (j1[k] < cstart || j1[k] >= cend) continue; 6586 const PetscInt rindex = i1[k] - rstart; 6587 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6588 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6589 } 6590 for (k = 0; k < n2; k++) { 6591 if (j2[k] < cstart || j2[k] >= cend) continue; 6592 const PetscInt rindex = i2[k] - rstart; 6593 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6594 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6595 } 6596 for (k = i1start; k < rem; k++) { 6597 const PetscInt rindex = i1[k] - rstart; 6598 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6599 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6600 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6601 } 6602 for (k = 0; k < n2; k++) { 6603 const PetscInt rindex = i2[k] - rstart; 6604 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6605 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6606 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6607 } 6608 PetscCall(PetscBTDestroy(&hasdiag)); 6609 PetscCall(PetscFree(minj)); 6610 } 6611 6612 /* Split local COOs and received COOs into diag/offdiag portions */ 6613 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6614 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6615 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6616 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6617 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6618 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6619 6620 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6621 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6622 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6623 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6624 6625 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6626 PetscInt *Ai, *Bi; 6627 PetscInt *Aj, *Bj; 6628 6629 PetscCall(PetscMalloc1(m + 1, &Ai)); 6630 PetscCall(PetscMalloc1(m + 1, &Bi)); 6631 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6632 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6633 6634 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6635 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6636 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6637 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6638 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6639 6640 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6641 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6642 6643 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6644 /* expect nonzeros in A/B most likely have local contributing entries */ 6645 PetscInt Annz = Ai[m]; 6646 PetscInt Bnnz = Bi[m]; 6647 PetscCount *Ajmap1_new, *Bjmap1_new; 6648 6649 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6650 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6651 6652 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6653 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6654 6655 PetscCall(PetscFree(Aimap1)); 6656 PetscCall(PetscFree(Ajmap1)); 6657 PetscCall(PetscFree(Bimap1)); 6658 PetscCall(PetscFree(Bjmap1)); 6659 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6660 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6661 PetscCall(PetscFree(perm1)); 6662 PetscCall(PetscFree3(i2, j2, perm2)); 6663 6664 Ajmap1 = Ajmap1_new; 6665 Bjmap1 = Bjmap1_new; 6666 6667 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6668 if (Annz < Annz1 + Annz2) { 6669 PetscInt *Aj_new; 6670 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6671 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6672 PetscCall(PetscFree(Aj)); 6673 Aj = Aj_new; 6674 } 6675 6676 if (Bnnz < Bnnz1 + Bnnz2) { 6677 PetscInt *Bj_new; 6678 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6679 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6680 PetscCall(PetscFree(Bj)); 6681 Bj = Bj_new; 6682 } 6683 6684 /* Create new submatrices for on-process and off-process coupling */ 6685 PetscScalar *Aa, *Ba; 6686 MatType rtype; 6687 Mat_SeqAIJ *a, *b; 6688 PetscObjectState state; 6689 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6690 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6691 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6692 if (cstart) { 6693 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6694 } 6695 PetscCall(MatDestroy(&mpiaij->A)); 6696 PetscCall(MatDestroy(&mpiaij->B)); 6697 PetscCall(MatGetRootType_Private(mat, &rtype)); 6698 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6699 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6700 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6701 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6702 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6703 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6704 6705 a = (Mat_SeqAIJ *)mpiaij->A->data; 6706 b = (Mat_SeqAIJ *)mpiaij->B->data; 6707 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6708 a->free_a = b->free_a = PETSC_TRUE; 6709 a->free_ij = b->free_ij = PETSC_TRUE; 6710 6711 /* conversion must happen AFTER multiply setup */ 6712 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6713 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6714 PetscCall(VecDestroy(&mpiaij->lvec)); 6715 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6716 6717 // Put the COO struct in a container and then attach that to the matrix 6718 PetscCall(PetscMalloc1(1, &coo)); 6719 coo->n = coo_n; 6720 coo->sf = sf2; 6721 coo->sendlen = nleaves; 6722 coo->recvlen = nroots; 6723 coo->Annz = Annz; 6724 coo->Bnnz = Bnnz; 6725 coo->Annz2 = Annz2; 6726 coo->Bnnz2 = Bnnz2; 6727 coo->Atot1 = Atot1; 6728 coo->Atot2 = Atot2; 6729 coo->Btot1 = Btot1; 6730 coo->Btot2 = Btot2; 6731 coo->Ajmap1 = Ajmap1; 6732 coo->Aperm1 = Aperm1; 6733 coo->Bjmap1 = Bjmap1; 6734 coo->Bperm1 = Bperm1; 6735 coo->Aimap2 = Aimap2; 6736 coo->Ajmap2 = Ajmap2; 6737 coo->Aperm2 = Aperm2; 6738 coo->Bimap2 = Bimap2; 6739 coo->Bjmap2 = Bjmap2; 6740 coo->Bperm2 = Bperm2; 6741 coo->Cperm1 = Cperm1; 6742 // Allocate in preallocation. If not used, it has zero cost on host 6743 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6744 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6745 PetscCall(PetscContainerSetPointer(container, coo)); 6746 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6747 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6748 PetscCall(PetscContainerDestroy(&container)); 6749 PetscFunctionReturn(PETSC_SUCCESS); 6750 } 6751 6752 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6753 { 6754 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6755 Mat A = mpiaij->A, B = mpiaij->B; 6756 PetscScalar *Aa, *Ba; 6757 PetscScalar *sendbuf, *recvbuf; 6758 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6759 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6760 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6761 const PetscCount *Cperm1; 6762 PetscContainer container; 6763 MatCOOStruct_MPIAIJ *coo; 6764 6765 PetscFunctionBegin; 6766 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6767 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6768 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6769 sendbuf = coo->sendbuf; 6770 recvbuf = coo->recvbuf; 6771 Ajmap1 = coo->Ajmap1; 6772 Ajmap2 = coo->Ajmap2; 6773 Aimap2 = coo->Aimap2; 6774 Bjmap1 = coo->Bjmap1; 6775 Bjmap2 = coo->Bjmap2; 6776 Bimap2 = coo->Bimap2; 6777 Aperm1 = coo->Aperm1; 6778 Aperm2 = coo->Aperm2; 6779 Bperm1 = coo->Bperm1; 6780 Bperm2 = coo->Bperm2; 6781 Cperm1 = coo->Cperm1; 6782 6783 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6784 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6785 6786 /* Pack entries to be sent to remote */ 6787 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6788 6789 /* Send remote entries to their owner and overlap the communication with local computation */ 6790 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6791 /* Add local entries to A and B */ 6792 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6793 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6794 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6795 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6796 } 6797 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6798 PetscScalar sum = 0.0; 6799 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6800 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6801 } 6802 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6803 6804 /* Add received remote entries to A and B */ 6805 for (PetscCount i = 0; i < coo->Annz2; i++) { 6806 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6807 } 6808 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6809 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6810 } 6811 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6812 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6813 PetscFunctionReturn(PETSC_SUCCESS); 6814 } 6815 6816 /*MC 6817 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6818 6819 Options Database Keys: 6820 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6821 6822 Level: beginner 6823 6824 Notes: 6825 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6826 in this case the values associated with the rows and columns one passes in are set to zero 6827 in the matrix 6828 6829 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6830 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6831 6832 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6833 M*/ 6834 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6835 { 6836 Mat_MPIAIJ *b; 6837 PetscMPIInt size; 6838 6839 PetscFunctionBegin; 6840 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6841 6842 PetscCall(PetscNew(&b)); 6843 B->data = (void *)b; 6844 B->ops[0] = MatOps_Values; 6845 B->assembled = PETSC_FALSE; 6846 B->insertmode = NOT_SET_VALUES; 6847 b->size = size; 6848 6849 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6850 6851 /* build cache for off array entries formed */ 6852 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6853 6854 b->donotstash = PETSC_FALSE; 6855 b->colmap = NULL; 6856 b->garray = NULL; 6857 b->roworiented = PETSC_TRUE; 6858 6859 /* stuff used for matrix vector multiply */ 6860 b->lvec = NULL; 6861 b->Mvctx = NULL; 6862 6863 /* stuff for MatGetRow() */ 6864 b->rowindices = NULL; 6865 b->rowvalues = NULL; 6866 b->getrowactive = PETSC_FALSE; 6867 6868 /* flexible pointer used in CUSPARSE classes */ 6869 b->spptr = NULL; 6870 6871 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6872 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6873 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6874 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6875 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6876 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6877 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6878 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6879 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6880 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6881 #if defined(PETSC_HAVE_CUDA) 6882 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6883 #endif 6884 #if defined(PETSC_HAVE_HIP) 6885 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6886 #endif 6887 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6889 #endif 6890 #if defined(PETSC_HAVE_MKL_SPARSE) 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6892 #endif 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6897 #if defined(PETSC_HAVE_ELEMENTAL) 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6899 #endif 6900 #if defined(PETSC_HAVE_SCALAPACK) 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6902 #endif 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6905 #if defined(PETSC_HAVE_HYPRE) 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6908 #endif 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6913 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6914 PetscFunctionReturn(PETSC_SUCCESS); 6915 } 6916 6917 /*@C 6918 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6919 and "off-diagonal" part of the matrix in CSR format. 6920 6921 Collective 6922 6923 Input Parameters: 6924 + comm - MPI communicator 6925 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6926 . n - This value should be the same as the local size used in creating the 6927 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6928 calculated if `N` is given) For square matrices `n` is almost always `m`. 6929 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6930 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6931 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6932 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6933 . a - matrix values 6934 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6935 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6936 - oa - matrix values 6937 6938 Output Parameter: 6939 . mat - the matrix 6940 6941 Level: advanced 6942 6943 Notes: 6944 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6945 must free the arrays once the matrix has been destroyed and not before. 6946 6947 The `i` and `j` indices are 0 based 6948 6949 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6950 6951 This sets local rows and cannot be used to set off-processor values. 6952 6953 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6954 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6955 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6956 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6957 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6958 communication if it is known that only local entries will be set. 6959 6960 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6961 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6962 @*/ 6963 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6964 { 6965 Mat_MPIAIJ *maij; 6966 6967 PetscFunctionBegin; 6968 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6969 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6970 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6971 PetscCall(MatCreate(comm, mat)); 6972 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6973 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6974 maij = (Mat_MPIAIJ *)(*mat)->data; 6975 6976 (*mat)->preallocated = PETSC_TRUE; 6977 6978 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6979 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6980 6981 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6982 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6983 6984 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6985 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6986 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6987 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6988 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6989 PetscFunctionReturn(PETSC_SUCCESS); 6990 } 6991 6992 typedef struct { 6993 Mat *mp; /* intermediate products */ 6994 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6995 PetscInt cp; /* number of intermediate products */ 6996 6997 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6998 PetscInt *startsj_s, *startsj_r; 6999 PetscScalar *bufa; 7000 Mat P_oth; 7001 7002 /* may take advantage of merging product->B */ 7003 Mat Bloc; /* B-local by merging diag and off-diag */ 7004 7005 /* cusparse does not have support to split between symbolic and numeric phases. 7006 When api_user is true, we don't need to update the numerical values 7007 of the temporary storage */ 7008 PetscBool reusesym; 7009 7010 /* support for COO values insertion */ 7011 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7012 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7013 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7014 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7015 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7016 PetscMemType mtype; 7017 7018 /* customization */ 7019 PetscBool abmerge; 7020 PetscBool P_oth_bind; 7021 } MatMatMPIAIJBACKEND; 7022 7023 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7024 { 7025 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7026 PetscInt i; 7027 7028 PetscFunctionBegin; 7029 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7030 PetscCall(PetscFree(mmdata->bufa)); 7031 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7032 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7033 PetscCall(MatDestroy(&mmdata->P_oth)); 7034 PetscCall(MatDestroy(&mmdata->Bloc)); 7035 PetscCall(PetscSFDestroy(&mmdata->sf)); 7036 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7037 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7038 PetscCall(PetscFree(mmdata->own[0])); 7039 PetscCall(PetscFree(mmdata->own)); 7040 PetscCall(PetscFree(mmdata->off[0])); 7041 PetscCall(PetscFree(mmdata->off)); 7042 PetscCall(PetscFree(mmdata)); 7043 PetscFunctionReturn(PETSC_SUCCESS); 7044 } 7045 7046 /* Copy selected n entries with indices in idx[] of A to v[]. 7047 If idx is NULL, copy the whole data array of A to v[] 7048 */ 7049 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7050 { 7051 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7052 7053 PetscFunctionBegin; 7054 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7055 if (f) { 7056 PetscCall((*f)(A, n, idx, v)); 7057 } else { 7058 const PetscScalar *vv; 7059 7060 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7061 if (n && idx) { 7062 PetscScalar *w = v; 7063 const PetscInt *oi = idx; 7064 PetscInt j; 7065 7066 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7067 } else { 7068 PetscCall(PetscArraycpy(v, vv, n)); 7069 } 7070 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7071 } 7072 PetscFunctionReturn(PETSC_SUCCESS); 7073 } 7074 7075 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7076 { 7077 MatMatMPIAIJBACKEND *mmdata; 7078 PetscInt i, n_d, n_o; 7079 7080 PetscFunctionBegin; 7081 MatCheckProduct(C, 1); 7082 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7083 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7084 if (!mmdata->reusesym) { /* update temporary matrices */ 7085 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7086 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7087 } 7088 mmdata->reusesym = PETSC_FALSE; 7089 7090 for (i = 0; i < mmdata->cp; i++) { 7091 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7092 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7093 } 7094 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7095 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7096 7097 if (mmdata->mptmp[i]) continue; 7098 if (noff) { 7099 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7100 7101 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7102 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7103 n_o += noff; 7104 n_d += nown; 7105 } else { 7106 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7107 7108 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7109 n_d += mm->nz; 7110 } 7111 } 7112 if (mmdata->hasoffproc) { /* offprocess insertion */ 7113 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7114 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7115 } 7116 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7117 PetscFunctionReturn(PETSC_SUCCESS); 7118 } 7119 7120 /* Support for Pt * A, A * P, or Pt * A * P */ 7121 #define MAX_NUMBER_INTERMEDIATE 4 7122 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7123 { 7124 Mat_Product *product = C->product; 7125 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7126 Mat_MPIAIJ *a, *p; 7127 MatMatMPIAIJBACKEND *mmdata; 7128 ISLocalToGlobalMapping P_oth_l2g = NULL; 7129 IS glob = NULL; 7130 const char *prefix; 7131 char pprefix[256]; 7132 const PetscInt *globidx, *P_oth_idx; 7133 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7134 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7135 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7136 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7137 /* a base offset; type-2: sparse with a local to global map table */ 7138 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7139 7140 MatProductType ptype; 7141 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7142 PetscMPIInt size; 7143 7144 PetscFunctionBegin; 7145 MatCheckProduct(C, 1); 7146 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7147 ptype = product->type; 7148 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7149 ptype = MATPRODUCT_AB; 7150 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7151 } 7152 switch (ptype) { 7153 case MATPRODUCT_AB: 7154 A = product->A; 7155 P = product->B; 7156 m = A->rmap->n; 7157 n = P->cmap->n; 7158 M = A->rmap->N; 7159 N = P->cmap->N; 7160 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7161 break; 7162 case MATPRODUCT_AtB: 7163 P = product->A; 7164 A = product->B; 7165 m = P->cmap->n; 7166 n = A->cmap->n; 7167 M = P->cmap->N; 7168 N = A->cmap->N; 7169 hasoffproc = PETSC_TRUE; 7170 break; 7171 case MATPRODUCT_PtAP: 7172 A = product->A; 7173 P = product->B; 7174 m = P->cmap->n; 7175 n = P->cmap->n; 7176 M = P->cmap->N; 7177 N = P->cmap->N; 7178 hasoffproc = PETSC_TRUE; 7179 break; 7180 default: 7181 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7182 } 7183 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7184 if (size == 1) hasoffproc = PETSC_FALSE; 7185 7186 /* defaults */ 7187 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7188 mp[i] = NULL; 7189 mptmp[i] = PETSC_FALSE; 7190 rmapt[i] = -1; 7191 cmapt[i] = -1; 7192 rmapa[i] = NULL; 7193 cmapa[i] = NULL; 7194 } 7195 7196 /* customization */ 7197 PetscCall(PetscNew(&mmdata)); 7198 mmdata->reusesym = product->api_user; 7199 if (ptype == MATPRODUCT_AB) { 7200 if (product->api_user) { 7201 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7202 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7203 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7204 PetscOptionsEnd(); 7205 } else { 7206 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7207 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7208 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7209 PetscOptionsEnd(); 7210 } 7211 } else if (ptype == MATPRODUCT_PtAP) { 7212 if (product->api_user) { 7213 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7214 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7215 PetscOptionsEnd(); 7216 } else { 7217 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7218 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7219 PetscOptionsEnd(); 7220 } 7221 } 7222 a = (Mat_MPIAIJ *)A->data; 7223 p = (Mat_MPIAIJ *)P->data; 7224 PetscCall(MatSetSizes(C, m, n, M, N)); 7225 PetscCall(PetscLayoutSetUp(C->rmap)); 7226 PetscCall(PetscLayoutSetUp(C->cmap)); 7227 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7228 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7229 7230 cp = 0; 7231 switch (ptype) { 7232 case MATPRODUCT_AB: /* A * P */ 7233 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7234 7235 /* A_diag * P_local (merged or not) */ 7236 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7237 /* P is product->B */ 7238 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7239 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7240 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7241 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7242 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7243 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7244 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7245 mp[cp]->product->api_user = product->api_user; 7246 PetscCall(MatProductSetFromOptions(mp[cp])); 7247 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7248 PetscCall(ISGetIndices(glob, &globidx)); 7249 rmapt[cp] = 1; 7250 cmapt[cp] = 2; 7251 cmapa[cp] = globidx; 7252 mptmp[cp] = PETSC_FALSE; 7253 cp++; 7254 } else { /* A_diag * P_diag and A_diag * P_off */ 7255 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7256 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7257 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7258 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7259 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7260 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7261 mp[cp]->product->api_user = product->api_user; 7262 PetscCall(MatProductSetFromOptions(mp[cp])); 7263 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7264 rmapt[cp] = 1; 7265 cmapt[cp] = 1; 7266 mptmp[cp] = PETSC_FALSE; 7267 cp++; 7268 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7269 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7270 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7271 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7272 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7273 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7274 mp[cp]->product->api_user = product->api_user; 7275 PetscCall(MatProductSetFromOptions(mp[cp])); 7276 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7277 rmapt[cp] = 1; 7278 cmapt[cp] = 2; 7279 cmapa[cp] = p->garray; 7280 mptmp[cp] = PETSC_FALSE; 7281 cp++; 7282 } 7283 7284 /* A_off * P_other */ 7285 if (mmdata->P_oth) { 7286 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7287 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7288 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7289 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7290 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7291 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7292 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7293 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7294 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7295 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7296 mp[cp]->product->api_user = product->api_user; 7297 PetscCall(MatProductSetFromOptions(mp[cp])); 7298 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7299 rmapt[cp] = 1; 7300 cmapt[cp] = 2; 7301 cmapa[cp] = P_oth_idx; 7302 mptmp[cp] = PETSC_FALSE; 7303 cp++; 7304 } 7305 break; 7306 7307 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7308 /* A is product->B */ 7309 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7310 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7311 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7312 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7313 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7314 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7315 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7316 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7317 mp[cp]->product->api_user = product->api_user; 7318 PetscCall(MatProductSetFromOptions(mp[cp])); 7319 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7320 PetscCall(ISGetIndices(glob, &globidx)); 7321 rmapt[cp] = 2; 7322 rmapa[cp] = globidx; 7323 cmapt[cp] = 2; 7324 cmapa[cp] = globidx; 7325 mptmp[cp] = PETSC_FALSE; 7326 cp++; 7327 } else { 7328 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7329 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7330 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7331 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7332 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7333 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7334 mp[cp]->product->api_user = product->api_user; 7335 PetscCall(MatProductSetFromOptions(mp[cp])); 7336 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7337 PetscCall(ISGetIndices(glob, &globidx)); 7338 rmapt[cp] = 1; 7339 cmapt[cp] = 2; 7340 cmapa[cp] = globidx; 7341 mptmp[cp] = PETSC_FALSE; 7342 cp++; 7343 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7344 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7345 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7346 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7347 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7348 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7349 mp[cp]->product->api_user = product->api_user; 7350 PetscCall(MatProductSetFromOptions(mp[cp])); 7351 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7352 rmapt[cp] = 2; 7353 rmapa[cp] = p->garray; 7354 cmapt[cp] = 2; 7355 cmapa[cp] = globidx; 7356 mptmp[cp] = PETSC_FALSE; 7357 cp++; 7358 } 7359 break; 7360 case MATPRODUCT_PtAP: 7361 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7362 /* P is product->B */ 7363 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7364 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7365 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7366 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7367 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7368 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7369 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7370 mp[cp]->product->api_user = product->api_user; 7371 PetscCall(MatProductSetFromOptions(mp[cp])); 7372 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7373 PetscCall(ISGetIndices(glob, &globidx)); 7374 rmapt[cp] = 2; 7375 rmapa[cp] = globidx; 7376 cmapt[cp] = 2; 7377 cmapa[cp] = globidx; 7378 mptmp[cp] = PETSC_FALSE; 7379 cp++; 7380 if (mmdata->P_oth) { 7381 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7382 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7383 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7384 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7385 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7386 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7387 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7388 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7389 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7390 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7391 mp[cp]->product->api_user = product->api_user; 7392 PetscCall(MatProductSetFromOptions(mp[cp])); 7393 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7394 mptmp[cp] = PETSC_TRUE; 7395 cp++; 7396 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7397 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7398 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7399 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7400 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7401 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7402 mp[cp]->product->api_user = product->api_user; 7403 PetscCall(MatProductSetFromOptions(mp[cp])); 7404 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7405 rmapt[cp] = 2; 7406 rmapa[cp] = globidx; 7407 cmapt[cp] = 2; 7408 cmapa[cp] = P_oth_idx; 7409 mptmp[cp] = PETSC_FALSE; 7410 cp++; 7411 } 7412 break; 7413 default: 7414 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7415 } 7416 /* sanity check */ 7417 if (size > 1) 7418 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7419 7420 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7421 for (i = 0; i < cp; i++) { 7422 mmdata->mp[i] = mp[i]; 7423 mmdata->mptmp[i] = mptmp[i]; 7424 } 7425 mmdata->cp = cp; 7426 C->product->data = mmdata; 7427 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7428 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7429 7430 /* memory type */ 7431 mmdata->mtype = PETSC_MEMTYPE_HOST; 7432 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7433 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7434 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7435 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7436 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7437 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7438 7439 /* prepare coo coordinates for values insertion */ 7440 7441 /* count total nonzeros of those intermediate seqaij Mats 7442 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7443 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7444 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7445 */ 7446 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7447 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7448 if (mptmp[cp]) continue; 7449 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7450 const PetscInt *rmap = rmapa[cp]; 7451 const PetscInt mr = mp[cp]->rmap->n; 7452 const PetscInt rs = C->rmap->rstart; 7453 const PetscInt re = C->rmap->rend; 7454 const PetscInt *ii = mm->i; 7455 for (i = 0; i < mr; i++) { 7456 const PetscInt gr = rmap[i]; 7457 const PetscInt nz = ii[i + 1] - ii[i]; 7458 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7459 else ncoo_oown += nz; /* this row is local */ 7460 } 7461 } else ncoo_d += mm->nz; 7462 } 7463 7464 /* 7465 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7466 7467 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7468 7469 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7470 7471 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7472 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7473 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7474 7475 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7476 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7477 */ 7478 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7479 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7480 7481 /* gather (i,j) of nonzeros inserted by remote procs */ 7482 if (hasoffproc) { 7483 PetscSF msf; 7484 PetscInt ncoo2, *coo_i2, *coo_j2; 7485 7486 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7487 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7488 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7489 7490 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7491 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7492 PetscInt *idxoff = mmdata->off[cp]; 7493 PetscInt *idxown = mmdata->own[cp]; 7494 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7495 const PetscInt *rmap = rmapa[cp]; 7496 const PetscInt *cmap = cmapa[cp]; 7497 const PetscInt *ii = mm->i; 7498 PetscInt *coi = coo_i + ncoo_o; 7499 PetscInt *coj = coo_j + ncoo_o; 7500 const PetscInt mr = mp[cp]->rmap->n; 7501 const PetscInt rs = C->rmap->rstart; 7502 const PetscInt re = C->rmap->rend; 7503 const PetscInt cs = C->cmap->rstart; 7504 for (i = 0; i < mr; i++) { 7505 const PetscInt *jj = mm->j + ii[i]; 7506 const PetscInt gr = rmap[i]; 7507 const PetscInt nz = ii[i + 1] - ii[i]; 7508 if (gr < rs || gr >= re) { /* this is an offproc row */ 7509 for (j = ii[i]; j < ii[i + 1]; j++) { 7510 *coi++ = gr; 7511 *idxoff++ = j; 7512 } 7513 if (!cmapt[cp]) { /* already global */ 7514 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7515 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7516 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7517 } else { /* offdiag */ 7518 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7519 } 7520 ncoo_o += nz; 7521 } else { /* this is a local row */ 7522 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7523 } 7524 } 7525 } 7526 mmdata->off[cp + 1] = idxoff; 7527 mmdata->own[cp + 1] = idxown; 7528 } 7529 7530 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7531 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7532 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7533 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7534 ncoo = ncoo_d + ncoo_oown + ncoo2; 7535 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7536 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7537 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7538 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7539 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7540 PetscCall(PetscFree2(coo_i, coo_j)); 7541 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7542 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7543 coo_i = coo_i2; 7544 coo_j = coo_j2; 7545 } else { /* no offproc values insertion */ 7546 ncoo = ncoo_d; 7547 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7548 7549 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7550 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7551 PetscCall(PetscSFSetUp(mmdata->sf)); 7552 } 7553 mmdata->hasoffproc = hasoffproc; 7554 7555 /* gather (i,j) of nonzeros inserted locally */ 7556 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7557 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7558 PetscInt *coi = coo_i + ncoo_d; 7559 PetscInt *coj = coo_j + ncoo_d; 7560 const PetscInt *jj = mm->j; 7561 const PetscInt *ii = mm->i; 7562 const PetscInt *cmap = cmapa[cp]; 7563 const PetscInt *rmap = rmapa[cp]; 7564 const PetscInt mr = mp[cp]->rmap->n; 7565 const PetscInt rs = C->rmap->rstart; 7566 const PetscInt re = C->rmap->rend; 7567 const PetscInt cs = C->cmap->rstart; 7568 7569 if (mptmp[cp]) continue; 7570 if (rmapt[cp] == 1) { /* consecutive rows */ 7571 /* fill coo_i */ 7572 for (i = 0; i < mr; i++) { 7573 const PetscInt gr = i + rs; 7574 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7575 } 7576 /* fill coo_j */ 7577 if (!cmapt[cp]) { /* type-0, already global */ 7578 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7579 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7580 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7581 } else { /* type-2, local to global for sparse columns */ 7582 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7583 } 7584 ncoo_d += mm->nz; 7585 } else if (rmapt[cp] == 2) { /* sparse rows */ 7586 for (i = 0; i < mr; i++) { 7587 const PetscInt *jj = mm->j + ii[i]; 7588 const PetscInt gr = rmap[i]; 7589 const PetscInt nz = ii[i + 1] - ii[i]; 7590 if (gr >= rs && gr < re) { /* local rows */ 7591 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7592 if (!cmapt[cp]) { /* type-0, already global */ 7593 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7594 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7595 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7596 } else { /* type-2, local to global for sparse columns */ 7597 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7598 } 7599 ncoo_d += nz; 7600 } 7601 } 7602 } 7603 } 7604 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7605 PetscCall(ISDestroy(&glob)); 7606 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7607 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7608 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7609 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7610 7611 /* preallocate with COO data */ 7612 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7613 PetscCall(PetscFree2(coo_i, coo_j)); 7614 PetscFunctionReturn(PETSC_SUCCESS); 7615 } 7616 7617 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7618 { 7619 Mat_Product *product = mat->product; 7620 #if defined(PETSC_HAVE_DEVICE) 7621 PetscBool match = PETSC_FALSE; 7622 PetscBool usecpu = PETSC_FALSE; 7623 #else 7624 PetscBool match = PETSC_TRUE; 7625 #endif 7626 7627 PetscFunctionBegin; 7628 MatCheckProduct(mat, 1); 7629 #if defined(PETSC_HAVE_DEVICE) 7630 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7631 if (match) { /* we can always fallback to the CPU if requested */ 7632 switch (product->type) { 7633 case MATPRODUCT_AB: 7634 if (product->api_user) { 7635 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7636 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7637 PetscOptionsEnd(); 7638 } else { 7639 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7640 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7641 PetscOptionsEnd(); 7642 } 7643 break; 7644 case MATPRODUCT_AtB: 7645 if (product->api_user) { 7646 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7647 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7648 PetscOptionsEnd(); 7649 } else { 7650 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7651 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7652 PetscOptionsEnd(); 7653 } 7654 break; 7655 case MATPRODUCT_PtAP: 7656 if (product->api_user) { 7657 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7658 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7659 PetscOptionsEnd(); 7660 } else { 7661 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7662 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7663 PetscOptionsEnd(); 7664 } 7665 break; 7666 default: 7667 break; 7668 } 7669 match = (PetscBool)!usecpu; 7670 } 7671 #endif 7672 if (match) { 7673 switch (product->type) { 7674 case MATPRODUCT_AB: 7675 case MATPRODUCT_AtB: 7676 case MATPRODUCT_PtAP: 7677 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7678 break; 7679 default: 7680 break; 7681 } 7682 } 7683 /* fallback to MPIAIJ ops */ 7684 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7685 PetscFunctionReturn(PETSC_SUCCESS); 7686 } 7687 7688 /* 7689 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7690 7691 n - the number of block indices in cc[] 7692 cc - the block indices (must be large enough to contain the indices) 7693 */ 7694 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7695 { 7696 PetscInt cnt = -1, nidx, j; 7697 const PetscInt *idx; 7698 7699 PetscFunctionBegin; 7700 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7701 if (nidx) { 7702 cnt = 0; 7703 cc[cnt] = idx[0] / bs; 7704 for (j = 1; j < nidx; j++) { 7705 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7706 } 7707 } 7708 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7709 *n = cnt + 1; 7710 PetscFunctionReturn(PETSC_SUCCESS); 7711 } 7712 7713 /* 7714 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7715 7716 ncollapsed - the number of block indices 7717 collapsed - the block indices (must be large enough to contain the indices) 7718 */ 7719 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7720 { 7721 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7722 7723 PetscFunctionBegin; 7724 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7725 for (i = start + 1; i < start + bs; i++) { 7726 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7727 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7728 cprevtmp = cprev; 7729 cprev = merged; 7730 merged = cprevtmp; 7731 } 7732 *ncollapsed = nprev; 7733 if (collapsed) *collapsed = cprev; 7734 PetscFunctionReturn(PETSC_SUCCESS); 7735 } 7736 7737 /* 7738 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7739 7740 Input Parameter: 7741 . Amat - matrix 7742 - symmetrize - make the result symmetric 7743 + scale - scale with diagonal 7744 7745 Output Parameter: 7746 . a_Gmat - output scalar graph >= 0 7747 7748 */ 7749 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7750 { 7751 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7752 MPI_Comm comm; 7753 Mat Gmat; 7754 PetscBool ismpiaij, isseqaij; 7755 Mat a, b, c; 7756 MatType jtype; 7757 7758 PetscFunctionBegin; 7759 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7760 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7761 PetscCall(MatGetSize(Amat, &MM, &NN)); 7762 PetscCall(MatGetBlockSize(Amat, &bs)); 7763 nloc = (Iend - Istart) / bs; 7764 7765 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7766 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7767 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7768 7769 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7770 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7771 implementation */ 7772 if (bs > 1) { 7773 PetscCall(MatGetType(Amat, &jtype)); 7774 PetscCall(MatCreate(comm, &Gmat)); 7775 PetscCall(MatSetType(Gmat, jtype)); 7776 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7777 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7778 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7779 PetscInt *d_nnz, *o_nnz; 7780 MatScalar *aa, val, *AA; 7781 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7782 if (isseqaij) { 7783 a = Amat; 7784 b = NULL; 7785 } else { 7786 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7787 a = d->A; 7788 b = d->B; 7789 } 7790 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7791 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7792 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7793 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7794 const PetscInt *cols1, *cols2; 7795 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7796 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7797 nnz[brow / bs] = nc2 / bs; 7798 if (nc2 % bs) ok = 0; 7799 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7800 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7801 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7802 if (nc1 != nc2) ok = 0; 7803 else { 7804 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7805 if (cols1[jj] != cols2[jj]) ok = 0; 7806 if (cols1[jj] % bs != jj % bs) ok = 0; 7807 } 7808 } 7809 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7810 } 7811 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7812 if (!ok) { 7813 PetscCall(PetscFree2(d_nnz, o_nnz)); 7814 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7815 goto old_bs; 7816 } 7817 } 7818 } 7819 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7820 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7821 PetscCall(PetscFree2(d_nnz, o_nnz)); 7822 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7823 // diag 7824 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7825 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7826 ai = aseq->i; 7827 n = ai[brow + 1] - ai[brow]; 7828 aj = aseq->j + ai[brow]; 7829 for (int k = 0; k < n; k += bs) { // block columns 7830 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7831 val = 0; 7832 if (index_size == 0) { 7833 for (int ii = 0; ii < bs; ii++) { // rows in block 7834 aa = aseq->a + ai[brow + ii] + k; 7835 for (int jj = 0; jj < bs; jj++) { // columns in block 7836 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7837 } 7838 } 7839 } else { // use (index,index) value if provided 7840 for (int iii = 0; iii < index_size; iii++) { // rows in block 7841 int ii = index[iii]; 7842 aa = aseq->a + ai[brow + ii] + k; 7843 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7844 int jj = index[jjj]; 7845 val += PetscAbs(PetscRealPart(aa[jj])); 7846 } 7847 } 7848 } 7849 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7850 AA[k / bs] = val; 7851 } 7852 grow = Istart / bs + brow / bs; 7853 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7854 } 7855 // off-diag 7856 if (ismpiaij) { 7857 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7858 const PetscScalar *vals; 7859 const PetscInt *cols, *garray = aij->garray; 7860 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7861 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7862 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7863 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7864 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7865 AA[k / bs] = 0; 7866 AJ[cidx] = garray[cols[k]] / bs; 7867 } 7868 nc = ncols / bs; 7869 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7870 if (index_size == 0) { 7871 for (int ii = 0; ii < bs; ii++) { // rows in block 7872 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7873 for (int k = 0; k < ncols; k += bs) { 7874 for (int jj = 0; jj < bs; jj++) { // cols in block 7875 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7876 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7877 } 7878 } 7879 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7880 } 7881 } else { // use (index,index) value if provided 7882 for (int iii = 0; iii < index_size; iii++) { // rows in block 7883 int ii = index[iii]; 7884 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7885 for (int k = 0; k < ncols; k += bs) { 7886 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7887 int jj = index[jjj]; 7888 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7889 } 7890 } 7891 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7892 } 7893 } 7894 grow = Istart / bs + brow / bs; 7895 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7896 } 7897 } 7898 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7899 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7900 PetscCall(PetscFree2(AA, AJ)); 7901 } else { 7902 const PetscScalar *vals; 7903 const PetscInt *idx; 7904 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7905 old_bs: 7906 /* 7907 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7908 */ 7909 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7910 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7911 if (isseqaij) { 7912 PetscInt max_d_nnz; 7913 /* 7914 Determine exact preallocation count for (sequential) scalar matrix 7915 */ 7916 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7917 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7918 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7919 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7920 PetscCall(PetscFree3(w0, w1, w2)); 7921 } else if (ismpiaij) { 7922 Mat Daij, Oaij; 7923 const PetscInt *garray; 7924 PetscInt max_d_nnz; 7925 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7926 /* 7927 Determine exact preallocation count for diagonal block portion of scalar matrix 7928 */ 7929 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7930 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7931 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7932 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7933 PetscCall(PetscFree3(w0, w1, w2)); 7934 /* 7935 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7936 */ 7937 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7938 o_nnz[jj] = 0; 7939 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7940 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7941 o_nnz[jj] += ncols; 7942 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7943 } 7944 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7945 } 7946 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7947 /* get scalar copy (norms) of matrix */ 7948 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7949 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7950 PetscCall(PetscFree2(d_nnz, o_nnz)); 7951 for (Ii = Istart; Ii < Iend; Ii++) { 7952 PetscInt dest_row = Ii / bs; 7953 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7954 for (jj = 0; jj < ncols; jj++) { 7955 PetscInt dest_col = idx[jj] / bs; 7956 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7957 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7958 } 7959 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7960 } 7961 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7962 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7963 } 7964 } else { 7965 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7966 else { 7967 Gmat = Amat; 7968 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7969 } 7970 if (isseqaij) { 7971 a = Gmat; 7972 b = NULL; 7973 } else { 7974 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7975 a = d->A; 7976 b = d->B; 7977 } 7978 if (filter >= 0 || scale) { 7979 /* take absolute value of each entry */ 7980 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7981 MatInfo info; 7982 PetscScalar *avals; 7983 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7984 PetscCall(MatSeqAIJGetArray(c, &avals)); 7985 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7986 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7987 } 7988 } 7989 } 7990 if (symmetrize) { 7991 PetscBool isset, issym; 7992 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7993 if (!isset || !issym) { 7994 Mat matTrans; 7995 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7996 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7997 PetscCall(MatDestroy(&matTrans)); 7998 } 7999 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8000 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8001 if (scale) { 8002 /* scale c for all diagonal values = 1 or -1 */ 8003 Vec diag; 8004 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8005 PetscCall(MatGetDiagonal(Gmat, diag)); 8006 PetscCall(VecReciprocal(diag)); 8007 PetscCall(VecSqrtAbs(diag)); 8008 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8009 PetscCall(VecDestroy(&diag)); 8010 } 8011 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8012 8013 if (filter >= 0) { 8014 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8015 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8016 } 8017 *a_Gmat = Gmat; 8018 PetscFunctionReturn(PETSC_SUCCESS); 8019 } 8020 8021 /* 8022 Special version for direct calls from Fortran 8023 */ 8024 #include <petsc/private/fortranimpl.h> 8025 8026 /* Change these macros so can be used in void function */ 8027 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8028 #undef PetscCall 8029 #define PetscCall(...) \ 8030 do { \ 8031 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8032 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8033 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8034 return; \ 8035 } \ 8036 } while (0) 8037 8038 #undef SETERRQ 8039 #define SETERRQ(comm, ierr, ...) \ 8040 do { \ 8041 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8042 return; \ 8043 } while (0) 8044 8045 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8046 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8047 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8048 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8049 #else 8050 #endif 8051 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8052 { 8053 Mat mat = *mmat; 8054 PetscInt m = *mm, n = *mn; 8055 InsertMode addv = *maddv; 8056 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8057 PetscScalar value; 8058 8059 MatCheckPreallocated(mat, 1); 8060 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8061 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8062 { 8063 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8064 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8065 PetscBool roworiented = aij->roworiented; 8066 8067 /* Some Variables required in the macro */ 8068 Mat A = aij->A; 8069 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8070 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8071 MatScalar *aa; 8072 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8073 Mat B = aij->B; 8074 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8075 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8076 MatScalar *ba; 8077 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8078 * cannot use "#if defined" inside a macro. */ 8079 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8080 8081 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8082 PetscInt nonew = a->nonew; 8083 MatScalar *ap1, *ap2; 8084 8085 PetscFunctionBegin; 8086 PetscCall(MatSeqAIJGetArray(A, &aa)); 8087 PetscCall(MatSeqAIJGetArray(B, &ba)); 8088 for (i = 0; i < m; i++) { 8089 if (im[i] < 0) continue; 8090 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8091 if (im[i] >= rstart && im[i] < rend) { 8092 row = im[i] - rstart; 8093 lastcol1 = -1; 8094 rp1 = aj + ai[row]; 8095 ap1 = aa + ai[row]; 8096 rmax1 = aimax[row]; 8097 nrow1 = ailen[row]; 8098 low1 = 0; 8099 high1 = nrow1; 8100 lastcol2 = -1; 8101 rp2 = bj + bi[row]; 8102 ap2 = ba + bi[row]; 8103 rmax2 = bimax[row]; 8104 nrow2 = bilen[row]; 8105 low2 = 0; 8106 high2 = nrow2; 8107 8108 for (j = 0; j < n; j++) { 8109 if (roworiented) value = v[i * n + j]; 8110 else value = v[i + j * m]; 8111 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8112 if (in[j] >= cstart && in[j] < cend) { 8113 col = in[j] - cstart; 8114 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8115 } else if (in[j] < 0) continue; 8116 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8117 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8118 } else { 8119 if (mat->was_assembled) { 8120 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8121 #if defined(PETSC_USE_CTABLE) 8122 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8123 col--; 8124 #else 8125 col = aij->colmap[in[j]] - 1; 8126 #endif 8127 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8128 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8129 col = in[j]; 8130 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8131 B = aij->B; 8132 b = (Mat_SeqAIJ *)B->data; 8133 bimax = b->imax; 8134 bi = b->i; 8135 bilen = b->ilen; 8136 bj = b->j; 8137 rp2 = bj + bi[row]; 8138 ap2 = ba + bi[row]; 8139 rmax2 = bimax[row]; 8140 nrow2 = bilen[row]; 8141 low2 = 0; 8142 high2 = nrow2; 8143 bm = aij->B->rmap->n; 8144 ba = b->a; 8145 inserted = PETSC_FALSE; 8146 } 8147 } else col = in[j]; 8148 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8149 } 8150 } 8151 } else if (!aij->donotstash) { 8152 if (roworiented) { 8153 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8154 } else { 8155 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8156 } 8157 } 8158 } 8159 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8160 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8161 } 8162 PetscFunctionReturnVoid(); 8163 } 8164 8165 /* Undefining these here since they were redefined from their original definition above! No 8166 * other PETSc functions should be defined past this point, as it is impossible to recover the 8167 * original definitions */ 8168 #undef PetscCall 8169 #undef SETERRQ 8170