1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 288 PetscFunctionBegin; 289 PetscCall(MatGetSize(A, &m, &n)); 290 PetscCall(PetscCalloc1(n, &work)); 291 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 292 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 294 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 295 if (type == NORM_2) { 296 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 297 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 298 } else if (type == NORM_1) { 299 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 300 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 301 } else if (type == NORM_INFINITY) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 304 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 307 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 310 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 311 if (type == NORM_INFINITY) { 312 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 313 } else { 314 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 315 } 316 PetscCall(PetscFree(work)); 317 if (type == NORM_2) { 318 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 319 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 320 for (i = 0; i < n; i++) reductions[i] /= m; 321 } 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 326 { 327 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 328 IS sis, gis; 329 const PetscInt *isis, *igis; 330 PetscInt n, *iis, nsis, ngis, rstart, i; 331 332 PetscFunctionBegin; 333 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 334 PetscCall(MatFindNonzeroRows(a->B, &gis)); 335 PetscCall(ISGetSize(gis, &ngis)); 336 PetscCall(ISGetSize(sis, &nsis)); 337 PetscCall(ISGetIndices(sis, &isis)); 338 PetscCall(ISGetIndices(gis, &igis)); 339 340 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 341 PetscCall(PetscArraycpy(iis, igis, ngis)); 342 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 343 n = ngis + nsis; 344 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 345 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 346 for (i = 0; i < n; i++) iis[i] += rstart; 347 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 348 349 PetscCall(ISRestoreIndices(sis, &isis)); 350 PetscCall(ISRestoreIndices(gis, &igis)); 351 PetscCall(ISDestroy(&sis)); 352 PetscCall(ISDestroy(&gis)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /* 357 Local utility routine that creates a mapping from the global column 358 number to the local number in the off-diagonal part of the local 359 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 360 a slightly higher hash table cost; without it it is not scalable (each processor 361 has an order N integer array but is fast to access. 362 */ 363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 364 { 365 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 366 PetscInt n = aij->B->cmap->n, i; 367 368 PetscFunctionBegin; 369 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 370 #if defined(PETSC_USE_CTABLE) 371 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 372 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 373 #else 374 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 375 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 376 #endif 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 381 do { \ 382 if (col <= lastcol1) low1 = 0; \ 383 else high1 = nrow1; \ 384 lastcol1 = col; \ 385 while (high1 - low1 > 5) { \ 386 t = (low1 + high1) / 2; \ 387 if (rp1[t] > col) high1 = t; \ 388 else low1 = t; \ 389 } \ 390 for (_i = low1; _i < high1; _i++) { \ 391 if (rp1[_i] > col) break; \ 392 if (rp1[_i] == col) { \ 393 if (addv == ADD_VALUES) { \ 394 ap1[_i] += value; \ 395 /* Not sure LogFlops will slow dow the code or not */ \ 396 (void)PetscLogFlops(1.0); \ 397 } else ap1[_i] = value; \ 398 goto a_noinsert; \ 399 } \ 400 } \ 401 if (value == 0.0 && ignorezeroentries && row != col) { \ 402 low1 = 0; \ 403 high1 = nrow1; \ 404 goto a_noinsert; \ 405 } \ 406 if (nonew == 1) { \ 407 low1 = 0; \ 408 high1 = nrow1; \ 409 goto a_noinsert; \ 410 } \ 411 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 412 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 413 N = nrow1++ - 1; \ 414 a->nz++; \ 415 high1++; \ 416 /* shift up all the later entries in this row */ \ 417 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 418 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 419 rp1[_i] = col; \ 420 ap1[_i] = value; \ 421 A->nonzerostate++; \ 422 a_noinsert:; \ 423 ailen[row] = nrow1; \ 424 } while (0) 425 426 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 427 do { \ 428 if (col <= lastcol2) low2 = 0; \ 429 else high2 = nrow2; \ 430 lastcol2 = col; \ 431 while (high2 - low2 > 5) { \ 432 t = (low2 + high2) / 2; \ 433 if (rp2[t] > col) high2 = t; \ 434 else low2 = t; \ 435 } \ 436 for (_i = low2; _i < high2; _i++) { \ 437 if (rp2[_i] > col) break; \ 438 if (rp2[_i] == col) { \ 439 if (addv == ADD_VALUES) { \ 440 ap2[_i] += value; \ 441 (void)PetscLogFlops(1.0); \ 442 } else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) { \ 447 low2 = 0; \ 448 high2 = nrow2; \ 449 goto b_noinsert; \ 450 } \ 451 if (nonew == 1) { \ 452 low2 = 0; \ 453 high2 = nrow2; \ 454 goto b_noinsert; \ 455 } \ 456 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 457 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 458 N = nrow2++ - 1; \ 459 b->nz++; \ 460 high2++; \ 461 /* shift up all the later entries in this row */ \ 462 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 463 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 464 rp2[_i] = col; \ 465 ap2[_i] = value; \ 466 B->nonzerostate++; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = bj + bi[row]; 584 ap2 = ba + bi[row]; 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right-hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscMPIInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1128 1129 PetscFunctionBegin; 1130 /* do nondiagonal part */ 1131 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1132 /* do local part */ 1133 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1134 /* add partial results together */ 1135 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1136 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /* 1141 This only works correctly for square matrices where the subblock A->A is the 1142 diagonal block 1143 */ 1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1150 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1151 PetscCall(MatGetDiagonal(a->A, v)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatScale(a->A, aa)); 1161 PetscCall(MatScale(a->B, aa)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1170 const PetscInt *garray = aij->garray; 1171 const PetscScalar *aa, *ba; 1172 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1173 PetscInt64 nz, hnz; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 PetscMPIInt rank; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1195 if (rank == 0) { 1196 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1197 else header[3] = (PetscInt)hnz; 1198 } 1199 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1200 1201 /* fill in and store row lengths */ 1202 PetscCall(PetscMalloc1(m, &rowlens)); 1203 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1205 PetscCall(PetscFree(rowlens)); 1206 1207 /* fill in and store column indices */ 1208 PetscCall(PetscMalloc1(nz, &colidxs)); 1209 for (cnt = 0, i = 0; i < m; i++) { 1210 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1211 if (garray[B->j[jb]] > cs) break; 1212 colidxs[cnt++] = garray[B->j[jb]]; 1213 } 1214 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1215 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1216 } 1217 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1218 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1219 PetscCall(PetscFree(colidxs)); 1220 1221 /* fill in and store nonzero values */ 1222 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1223 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1224 PetscCall(PetscMalloc1(nz, &matvals)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 matvals[cnt++] = ba[jb]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1231 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1232 } 1233 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1235 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1236 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1237 PetscCall(PetscFree(matvals)); 1238 1239 /* write block size option to the viewer's .info file */ 1240 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 #include <petscdraw.h> 1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1246 { 1247 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1248 PetscMPIInt rank = aij->rank, size = aij->size; 1249 PetscBool isdraw, iascii, isbinary; 1250 PetscViewer sviewer; 1251 PetscViewerFormat format; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1255 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1256 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1257 if (iascii) { 1258 PetscCall(PetscViewerGetFormat(viewer, &format)); 1259 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1260 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1261 PetscCall(PetscMalloc1(size, &nz)); 1262 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1263 for (i = 0; i < (PetscInt)size; i++) { 1264 nmax = PetscMax(nmax, nz[i]); 1265 nmin = PetscMin(nmin, nz[i]); 1266 navg += nz[i]; 1267 } 1268 PetscCall(PetscFree(nz)); 1269 navg = navg / size; 1270 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 PetscCall(PetscViewerGetFormat(viewer, &format)); 1274 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1275 MatInfo info; 1276 PetscInt *inodes = NULL; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1279 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1280 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1281 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1282 if (!inodes) { 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1284 (double)info.memory)); 1285 } else { 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1287 (double)info.memory)); 1288 } 1289 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1293 PetscCall(PetscViewerFlush(viewer)); 1294 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1295 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1296 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1299 PetscInt inodecount, inodelimit, *inodes; 1300 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1301 if (inodes) { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1303 } else { 1304 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1305 } 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 } else if (isbinary) { 1311 if (size == 1) { 1312 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1313 PetscCall(MatView(aij->A, viewer)); 1314 } else { 1315 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (iascii && size == 1) { 1319 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1320 PetscCall(MatView(aij->A, viewer)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } else if (isdraw) { 1323 PetscDraw draw; 1324 PetscBool isnull; 1325 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1326 PetscCall(PetscDrawIsNull(draw, &isnull)); 1327 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 { /* assemble the entire matrix onto first processor */ 1331 Mat A = NULL, Av; 1332 IS isrow, iscol; 1333 1334 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1335 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1336 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1337 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1338 /* The commented code uses MatCreateSubMatrices instead */ 1339 /* 1340 Mat *AA, A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1344 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1345 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1346 if (rank == 0) { 1347 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1348 A = AA[0]; 1349 Av = AA[0]; 1350 } 1351 PetscCall(MatDestroySubMatrices(1,&AA)); 1352 */ 1353 PetscCall(ISDestroy(&iscol)); 1354 PetscCall(ISDestroy(&isrow)); 1355 /* 1356 Everyone has to call to draw the matrix since the graphics waits are 1357 synchronized across all processors that share the PetscDraw object 1358 */ 1359 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 if (rank == 0) { 1361 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1362 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1363 } 1364 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1365 PetscCall(MatDestroy(&A)); 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1371 { 1372 PetscBool iascii, isdraw, issocket, isbinary; 1373 1374 PetscFunctionBegin; 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1377 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1378 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1379 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1384 { 1385 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1386 Vec bb1 = NULL; 1387 PetscBool hasop; 1388 1389 PetscFunctionBegin; 1390 if (flag == SOR_APPLY_UPPER) { 1391 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1396 1397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1398 if (flag & SOR_ZERO_INITIAL_GUESS) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 its--; 1401 } 1402 1403 while (its--) { 1404 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1405 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1406 1407 /* update rhs: bb1 = bb - B*x */ 1408 PetscCall(VecScale(mat->lvec, -1.0)); 1409 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1410 1411 /* local sweep */ 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1413 } 1414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_EISENSTAT) { 1447 Vec xx1; 1448 1449 PetscCall(VecDuplicate(bb, &xx1)); 1450 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1451 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 if (!mat->diag) { 1455 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1456 PetscCall(MatGetDiagonal(matin, mat->diag)); 1457 } 1458 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1459 if (hasop) { 1460 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1461 } else { 1462 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1463 } 1464 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1465 1466 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1467 1468 /* local sweep */ 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1470 PetscCall(VecAXPY(xx, 1.0, xx1)); 1471 PetscCall(VecDestroy(&xx1)); 1472 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1473 1474 PetscCall(VecDestroy(&bb1)); 1475 1476 matin->factorerrortype = mat->A->factorerrortype; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1481 { 1482 Mat aA, aB, Aperm; 1483 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1484 PetscScalar *aa, *ba; 1485 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1486 PetscSF rowsf, sf; 1487 IS parcolp = NULL; 1488 PetscBool done; 1489 1490 PetscFunctionBegin; 1491 PetscCall(MatGetLocalSize(A, &m, &n)); 1492 PetscCall(ISGetIndices(rowp, &rwant)); 1493 PetscCall(ISGetIndices(colp, &cwant)); 1494 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1495 1496 /* Invert row permutation to find out where my rows should go */ 1497 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1498 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1499 PetscCall(PetscSFSetFromOptions(rowsf)); 1500 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1501 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1502 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1503 1504 /* Invert column permutation to find out where my columns should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1506 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1507 PetscCall(PetscSFSetFromOptions(sf)); 1508 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1511 PetscCall(PetscSFDestroy(&sf)); 1512 1513 PetscCall(ISRestoreIndices(rowp, &rwant)); 1514 PetscCall(ISRestoreIndices(colp, &cwant)); 1515 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1516 1517 /* Find out where my gcols should go */ 1518 PetscCall(MatGetSize(aB, NULL, &ng)); 1519 PetscCall(PetscMalloc1(ng, &gcdest)); 1520 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1521 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1522 PetscCall(PetscSFSetFromOptions(sf)); 1523 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1524 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1525 PetscCall(PetscSFDestroy(&sf)); 1526 1527 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1528 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1529 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1530 for (i = 0; i < m; i++) { 1531 PetscInt row = rdest[i]; 1532 PetscMPIInt rowner; 1533 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 PetscInt col = cdest[aj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 for (j = bi[i]; j < bi[i + 1]; j++) { 1542 PetscInt col = gcdest[bj[j]]; 1543 PetscMPIInt cowner; 1544 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1545 if (rowner == cowner) dnnz[i]++; 1546 else onnz[i]++; 1547 } 1548 } 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1551 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&rowsf)); 1554 1555 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1556 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1557 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1560 PetscInt j0, rowlen; 1561 rowlen = ai[i + 1] - ai[i]; 1562 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1563 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1564 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1565 } 1566 rowlen = bi[i + 1] - bi[i]; 1567 for (j0 = j = 0; j < rowlen; j0 = j) { 1568 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1569 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1570 } 1571 } 1572 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1575 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1576 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1577 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1578 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1579 PetscCall(PetscFree3(work, rdest, cdest)); 1580 PetscCall(PetscFree(gcdest)); 1581 if (parcolp) PetscCall(ISDestroy(&colp)); 1582 *B = Aperm; 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 } 1585 1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1587 { 1588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1589 1590 PetscFunctionBegin; 1591 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1592 if (ghosts) *ghosts = aij->garray; 1593 PetscFunctionReturn(PETSC_SUCCESS); 1594 } 1595 1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1597 { 1598 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1599 Mat A = mat->A, B = mat->B; 1600 PetscLogDouble isend[5], irecv[5]; 1601 1602 PetscFunctionBegin; 1603 info->block_size = 1.0; 1604 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1605 1606 isend[0] = info->nz_used; 1607 isend[1] = info->nz_allocated; 1608 isend[2] = info->nz_unneeded; 1609 isend[3] = info->memory; 1610 isend[4] = info->mallocs; 1611 1612 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1613 1614 isend[0] += info->nz_used; 1615 isend[1] += info->nz_allocated; 1616 isend[2] += info->nz_unneeded; 1617 isend[3] += info->memory; 1618 isend[4] += info->mallocs; 1619 if (flag == MAT_LOCAL) { 1620 info->nz_used = isend[0]; 1621 info->nz_allocated = isend[1]; 1622 info->nz_unneeded = isend[2]; 1623 info->memory = isend[3]; 1624 info->mallocs = isend[4]; 1625 } else if (flag == MAT_GLOBAL_MAX) { 1626 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1627 1628 info->nz_used = irecv[0]; 1629 info->nz_allocated = irecv[1]; 1630 info->nz_unneeded = irecv[2]; 1631 info->memory = irecv[3]; 1632 info->mallocs = irecv[4]; 1633 } else if (flag == MAT_GLOBAL_SUM) { 1634 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } 1642 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1643 info->fill_ratio_needed = 0; 1644 info->factor_mallocs = 0; 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1649 { 1650 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1651 1652 PetscFunctionBegin; 1653 switch (op) { 1654 case MAT_NEW_NONZERO_LOCATIONS: 1655 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1656 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1657 case MAT_KEEP_NONZERO_PATTERN: 1658 case MAT_NEW_NONZERO_LOCATION_ERR: 1659 case MAT_USE_INODES: 1660 case MAT_IGNORE_ZERO_ENTRIES: 1661 case MAT_FORM_EXPLICIT_TRANSPOSE: 1662 MatCheckPreallocated(A, 1); 1663 PetscCall(MatSetOption(a->A, op, flg)); 1664 PetscCall(MatSetOption(a->B, op, flg)); 1665 break; 1666 case MAT_ROW_ORIENTED: 1667 MatCheckPreallocated(A, 1); 1668 a->roworiented = flg; 1669 1670 PetscCall(MatSetOption(a->A, op, flg)); 1671 PetscCall(MatSetOption(a->B, op, flg)); 1672 break; 1673 case MAT_FORCE_DIAGONAL_ENTRIES: 1674 case MAT_SORTED_FULL: 1675 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1676 break; 1677 case MAT_IGNORE_OFF_PROC_ENTRIES: 1678 a->donotstash = flg; 1679 break; 1680 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1681 case MAT_SPD: 1682 case MAT_SYMMETRIC: 1683 case MAT_STRUCTURALLY_SYMMETRIC: 1684 case MAT_HERMITIAN: 1685 case MAT_SYMMETRY_ETERNAL: 1686 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1687 case MAT_SPD_ETERNAL: 1688 /* if the diagonal matrix is square it inherits some of the properties above */ 1689 break; 1690 case MAT_SUBMAT_SINGLEIS: 1691 A->submat_singleis = flg; 1692 break; 1693 case MAT_STRUCTURE_ONLY: 1694 /* The option is handled directly by MatSetOption() */ 1695 break; 1696 default: 1697 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1698 } 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1703 { 1704 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1705 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1706 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1707 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1708 PetscInt *cmap, *idx_p; 1709 1710 PetscFunctionBegin; 1711 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1712 mat->getrowactive = PETSC_TRUE; 1713 1714 if (!mat->rowvalues && (idx || v)) { 1715 /* 1716 allocate enough space to hold information from the longest row. 1717 */ 1718 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1719 PetscInt max = 1, tmp; 1720 for (i = 0; i < matin->rmap->n; i++) { 1721 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1722 if (max < tmp) max = tmp; 1723 } 1724 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1725 } 1726 1727 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1728 lrow = row - rstart; 1729 1730 pvA = &vworkA; 1731 pcA = &cworkA; 1732 pvB = &vworkB; 1733 pcB = &cworkB; 1734 if (!v) { 1735 pvA = NULL; 1736 pvB = NULL; 1737 } 1738 if (!idx) { 1739 pcA = NULL; 1740 if (!v) pcB = NULL; 1741 } 1742 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1743 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1744 nztot = nzA + nzB; 1745 1746 cmap = mat->garray; 1747 if (v || idx) { 1748 if (nztot) { 1749 /* Sort by increasing column numbers, assuming A and B already sorted */ 1750 PetscInt imark = -1; 1751 if (v) { 1752 *v = v_p = mat->rowvalues; 1753 for (i = 0; i < nzB; i++) { 1754 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1755 else break; 1756 } 1757 imark = i; 1758 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1759 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1760 } 1761 if (idx) { 1762 *idx = idx_p = mat->rowindices; 1763 if (imark > -1) { 1764 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1765 } else { 1766 for (i = 0; i < nzB; i++) { 1767 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1768 else break; 1769 } 1770 imark = i; 1771 } 1772 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1773 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1774 } 1775 } else { 1776 if (idx) *idx = NULL; 1777 if (v) *v = NULL; 1778 } 1779 } 1780 *nz = nztot; 1781 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1782 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1787 { 1788 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1789 1790 PetscFunctionBegin; 1791 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1792 aij->getrowactive = PETSC_FALSE; 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1800 PetscInt i, j, cstart = mat->cmap->rstart; 1801 PetscReal sum = 0.0; 1802 const MatScalar *v, *amata, *bmata; 1803 1804 PetscFunctionBegin; 1805 if (aij->size == 1) { 1806 PetscCall(MatNorm(aij->A, type, norm)); 1807 } else { 1808 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1809 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1810 if (type == NORM_FROBENIUS) { 1811 v = amata; 1812 for (i = 0; i < amat->nz; i++) { 1813 sum += PetscRealPart(PetscConj(*v) * (*v)); 1814 v++; 1815 } 1816 v = bmata; 1817 for (i = 0; i < bmat->nz; i++) { 1818 sum += PetscRealPart(PetscConj(*v) * (*v)); 1819 v++; 1820 } 1821 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1822 *norm = PetscSqrtReal(*norm); 1823 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1824 } else if (type == NORM_1) { /* max column norm */ 1825 PetscReal *tmp, *tmp2; 1826 PetscInt *jj, *garray = aij->garray; 1827 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1828 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1829 *norm = 0.0; 1830 v = amata; 1831 jj = amat->j; 1832 for (j = 0; j < amat->nz; j++) { 1833 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1834 v++; 1835 } 1836 v = bmata; 1837 jj = bmat->j; 1838 for (j = 0; j < bmat->nz; j++) { 1839 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1840 v++; 1841 } 1842 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1843 for (j = 0; j < mat->cmap->N; j++) { 1844 if (tmp2[j] > *norm) *norm = tmp2[j]; 1845 } 1846 PetscCall(PetscFree(tmp)); 1847 PetscCall(PetscFree(tmp2)); 1848 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1849 } else if (type == NORM_INFINITY) { /* max row norm */ 1850 PetscReal ntemp = 0.0; 1851 for (j = 0; j < aij->A->rmap->n; j++) { 1852 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1853 sum = 0.0; 1854 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1855 sum += PetscAbsScalar(*v); 1856 v++; 1857 } 1858 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1859 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1860 sum += PetscAbsScalar(*v); 1861 v++; 1862 } 1863 if (sum > ntemp) ntemp = sum; 1864 } 1865 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1866 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1867 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1868 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1869 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1870 } 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 1874 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1875 { 1876 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1877 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1878 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1879 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1880 Mat B, A_diag, *B_diag; 1881 const MatScalar *pbv, *bv; 1882 1883 PetscFunctionBegin; 1884 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1885 ma = A->rmap->n; 1886 na = A->cmap->n; 1887 mb = a->B->rmap->n; 1888 nb = a->B->cmap->n; 1889 ai = Aloc->i; 1890 aj = Aloc->j; 1891 bi = Bloc->i; 1892 bj = Bloc->j; 1893 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1894 PetscInt *d_nnz, *g_nnz, *o_nnz; 1895 PetscSFNode *oloc; 1896 PETSC_UNUSED PetscSF sf; 1897 1898 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1899 /* compute d_nnz for preallocation */ 1900 PetscCall(PetscArrayzero(d_nnz, na)); 1901 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1902 /* compute local off-diagonal contributions */ 1903 PetscCall(PetscArrayzero(g_nnz, nb)); 1904 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1905 /* map those to global */ 1906 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1907 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1908 PetscCall(PetscSFSetFromOptions(sf)); 1909 PetscCall(PetscArrayzero(o_nnz, na)); 1910 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1911 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1912 PetscCall(PetscSFDestroy(&sf)); 1913 1914 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1915 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1916 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1917 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1918 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1919 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1920 } else { 1921 B = *matout; 1922 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1923 } 1924 1925 b = (Mat_MPIAIJ *)B->data; 1926 A_diag = a->A; 1927 B_diag = &b->A; 1928 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1929 A_diag_ncol = A_diag->cmap->N; 1930 B_diag_ilen = sub_B_diag->ilen; 1931 B_diag_i = sub_B_diag->i; 1932 1933 /* Set ilen for diagonal of B */ 1934 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1935 1936 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1937 very quickly (=without using MatSetValues), because all writes are local. */ 1938 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1939 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1940 1941 /* copy over the B part */ 1942 PetscCall(PetscMalloc1(bi[mb], &cols)); 1943 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1944 pbv = bv; 1945 row = A->rmap->rstart; 1946 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1947 cols_tmp = cols; 1948 for (i = 0; i < mb; i++) { 1949 ncol = bi[i + 1] - bi[i]; 1950 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1951 row++; 1952 if (pbv) pbv += ncol; 1953 if (cols_tmp) cols_tmp += ncol; 1954 } 1955 PetscCall(PetscFree(cols)); 1956 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1957 1958 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1959 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1960 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1961 *matout = B; 1962 } else { 1963 PetscCall(MatHeaderMerge(A, &B)); 1964 } 1965 PetscFunctionReturn(PETSC_SUCCESS); 1966 } 1967 1968 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1969 { 1970 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1971 Mat a = aij->A, b = aij->B; 1972 PetscInt s1, s2, s3; 1973 1974 PetscFunctionBegin; 1975 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1976 if (rr) { 1977 PetscCall(VecGetLocalSize(rr, &s1)); 1978 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1979 /* Overlap communication with computation. */ 1980 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1981 } 1982 if (ll) { 1983 PetscCall(VecGetLocalSize(ll, &s1)); 1984 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1985 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1986 } 1987 /* scale the diagonal block */ 1988 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1989 1990 if (rr) { 1991 /* Do a scatter end and then right scale the off-diagonal block */ 1992 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1993 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1994 } 1995 PetscFunctionReturn(PETSC_SUCCESS); 1996 } 1997 1998 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1999 { 2000 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2001 2002 PetscFunctionBegin; 2003 PetscCall(MatSetUnfactored(a->A)); 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2008 { 2009 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2010 Mat a, b, c, d; 2011 PetscBool flg; 2012 2013 PetscFunctionBegin; 2014 a = matA->A; 2015 b = matA->B; 2016 c = matB->A; 2017 d = matB->B; 2018 2019 PetscCall(MatEqual(a, c, &flg)); 2020 if (flg) PetscCall(MatEqual(b, d, &flg)); 2021 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2022 PetscFunctionReturn(PETSC_SUCCESS); 2023 } 2024 2025 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2026 { 2027 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2028 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2029 2030 PetscFunctionBegin; 2031 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2032 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2033 /* because of the column compression in the off-processor part of the matrix a->B, 2034 the number of columns in a->B and b->B may be different, hence we cannot call 2035 the MatCopy() directly on the two parts. If need be, we can provide a more 2036 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2037 then copying the submatrices */ 2038 PetscCall(MatCopy_Basic(A, B, str)); 2039 } else { 2040 PetscCall(MatCopy(a->A, b->A, str)); 2041 PetscCall(MatCopy(a->B, b->B, str)); 2042 } 2043 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2044 PetscFunctionReturn(PETSC_SUCCESS); 2045 } 2046 2047 /* 2048 Computes the number of nonzeros per row needed for preallocation when X and Y 2049 have different nonzero structure. 2050 */ 2051 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2052 { 2053 PetscInt i, j, k, nzx, nzy; 2054 2055 PetscFunctionBegin; 2056 /* Set the number of nonzeros in the new matrix */ 2057 for (i = 0; i < m; i++) { 2058 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2059 nzx = xi[i + 1] - xi[i]; 2060 nzy = yi[i + 1] - yi[i]; 2061 nnz[i] = 0; 2062 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2063 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2064 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2065 nnz[i]++; 2066 } 2067 for (; k < nzy; k++) nnz[i]++; 2068 } 2069 PetscFunctionReturn(PETSC_SUCCESS); 2070 } 2071 2072 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2073 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2074 { 2075 PetscInt m = Y->rmap->N; 2076 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2077 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2078 2079 PetscFunctionBegin; 2080 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2081 PetscFunctionReturn(PETSC_SUCCESS); 2082 } 2083 2084 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2085 { 2086 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 if (str == SAME_NONZERO_PATTERN) { 2090 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2091 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2092 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2093 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2094 } else { 2095 Mat B; 2096 PetscInt *nnz_d, *nnz_o; 2097 2098 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2099 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2100 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2101 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2102 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2103 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2104 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2105 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2106 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2107 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2108 PetscCall(MatHeaderMerge(Y, &B)); 2109 PetscCall(PetscFree(nnz_d)); 2110 PetscCall(PetscFree(nnz_o)); 2111 } 2112 PetscFunctionReturn(PETSC_SUCCESS); 2113 } 2114 2115 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2116 2117 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2118 { 2119 PetscFunctionBegin; 2120 if (PetscDefined(USE_COMPLEX)) { 2121 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2122 2123 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2124 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2125 } 2126 PetscFunctionReturn(PETSC_SUCCESS); 2127 } 2128 2129 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2130 { 2131 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2132 2133 PetscFunctionBegin; 2134 PetscCall(MatRealPart(a->A)); 2135 PetscCall(MatRealPart(a->B)); 2136 PetscFunctionReturn(PETSC_SUCCESS); 2137 } 2138 2139 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2140 { 2141 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2142 2143 PetscFunctionBegin; 2144 PetscCall(MatImaginaryPart(a->A)); 2145 PetscCall(MatImaginaryPart(a->B)); 2146 PetscFunctionReturn(PETSC_SUCCESS); 2147 } 2148 2149 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2150 { 2151 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2152 PetscInt i, *idxb = NULL, m = A->rmap->n; 2153 PetscScalar *va, *vv; 2154 Vec vB, vA; 2155 const PetscScalar *vb; 2156 2157 PetscFunctionBegin; 2158 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2159 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2160 2161 PetscCall(VecGetArrayWrite(vA, &va)); 2162 if (idx) { 2163 for (i = 0; i < m; i++) { 2164 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2165 } 2166 } 2167 2168 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2169 PetscCall(PetscMalloc1(m, &idxb)); 2170 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2171 2172 PetscCall(VecGetArrayWrite(v, &vv)); 2173 PetscCall(VecGetArrayRead(vB, &vb)); 2174 for (i = 0; i < m; i++) { 2175 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2176 vv[i] = vb[i]; 2177 if (idx) idx[i] = a->garray[idxb[i]]; 2178 } else { 2179 vv[i] = va[i]; 2180 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2181 } 2182 } 2183 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2184 PetscCall(VecRestoreArrayWrite(vA, &va)); 2185 PetscCall(VecRestoreArrayRead(vB, &vb)); 2186 PetscCall(PetscFree(idxb)); 2187 PetscCall(VecDestroy(&vA)); 2188 PetscCall(VecDestroy(&vB)); 2189 PetscFunctionReturn(PETSC_SUCCESS); 2190 } 2191 2192 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2193 { 2194 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2195 Vec vB, vA; 2196 2197 PetscFunctionBegin; 2198 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2199 PetscCall(MatGetRowSumAbs(a->A, vA)); 2200 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2201 PetscCall(MatGetRowSumAbs(a->B, vB)); 2202 PetscCall(VecAXPY(vA, 1.0, vB)); 2203 PetscCall(VecDestroy(&vB)); 2204 PetscCall(VecCopy(vA, v)); 2205 PetscCall(VecDestroy(&vA)); 2206 PetscFunctionReturn(PETSC_SUCCESS); 2207 } 2208 2209 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2210 { 2211 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2212 PetscInt m = A->rmap->n, n = A->cmap->n; 2213 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2214 PetscInt *cmap = mat->garray; 2215 PetscInt *diagIdx, *offdiagIdx; 2216 Vec diagV, offdiagV; 2217 PetscScalar *a, *diagA, *offdiagA; 2218 const PetscScalar *ba, *bav; 2219 PetscInt r, j, col, ncols, *bi, *bj; 2220 Mat B = mat->B; 2221 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2222 2223 PetscFunctionBegin; 2224 /* When a process holds entire A and other processes have no entry */ 2225 if (A->cmap->N == n) { 2226 PetscCall(VecGetArrayWrite(v, &diagA)); 2227 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2228 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2229 PetscCall(VecDestroy(&diagV)); 2230 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2231 PetscFunctionReturn(PETSC_SUCCESS); 2232 } else if (n == 0) { 2233 if (m) { 2234 PetscCall(VecGetArrayWrite(v, &a)); 2235 for (r = 0; r < m; r++) { 2236 a[r] = 0.0; 2237 if (idx) idx[r] = -1; 2238 } 2239 PetscCall(VecRestoreArrayWrite(v, &a)); 2240 } 2241 PetscFunctionReturn(PETSC_SUCCESS); 2242 } 2243 2244 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2245 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2246 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2247 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2248 2249 /* Get offdiagIdx[] for implicit 0.0 */ 2250 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2251 ba = bav; 2252 bi = b->i; 2253 bj = b->j; 2254 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2255 for (r = 0; r < m; r++) { 2256 ncols = bi[r + 1] - bi[r]; 2257 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2258 offdiagA[r] = *ba; 2259 offdiagIdx[r] = cmap[0]; 2260 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2261 offdiagA[r] = 0.0; 2262 2263 /* Find first hole in the cmap */ 2264 for (j = 0; j < ncols; j++) { 2265 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2266 if (col > j && j < cstart) { 2267 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2268 break; 2269 } else if (col > j + n && j >= cstart) { 2270 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2271 break; 2272 } 2273 } 2274 if (j == ncols && ncols < A->cmap->N - n) { 2275 /* a hole is outside compressed Bcols */ 2276 if (ncols == 0) { 2277 if (cstart) { 2278 offdiagIdx[r] = 0; 2279 } else offdiagIdx[r] = cend; 2280 } else { /* ncols > 0 */ 2281 offdiagIdx[r] = cmap[ncols - 1] + 1; 2282 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2283 } 2284 } 2285 } 2286 2287 for (j = 0; j < ncols; j++) { 2288 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2289 offdiagA[r] = *ba; 2290 offdiagIdx[r] = cmap[*bj]; 2291 } 2292 ba++; 2293 bj++; 2294 } 2295 } 2296 2297 PetscCall(VecGetArrayWrite(v, &a)); 2298 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2299 for (r = 0; r < m; ++r) { 2300 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2301 a[r] = diagA[r]; 2302 if (idx) idx[r] = cstart + diagIdx[r]; 2303 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2304 a[r] = diagA[r]; 2305 if (idx) { 2306 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2307 idx[r] = cstart + diagIdx[r]; 2308 } else idx[r] = offdiagIdx[r]; 2309 } 2310 } else { 2311 a[r] = offdiagA[r]; 2312 if (idx) idx[r] = offdiagIdx[r]; 2313 } 2314 } 2315 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2316 PetscCall(VecRestoreArrayWrite(v, &a)); 2317 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2318 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2319 PetscCall(VecDestroy(&diagV)); 2320 PetscCall(VecDestroy(&offdiagV)); 2321 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2322 PetscFunctionReturn(PETSC_SUCCESS); 2323 } 2324 2325 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2326 { 2327 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2328 PetscInt m = A->rmap->n, n = A->cmap->n; 2329 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2330 PetscInt *cmap = mat->garray; 2331 PetscInt *diagIdx, *offdiagIdx; 2332 Vec diagV, offdiagV; 2333 PetscScalar *a, *diagA, *offdiagA; 2334 const PetscScalar *ba, *bav; 2335 PetscInt r, j, col, ncols, *bi, *bj; 2336 Mat B = mat->B; 2337 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2338 2339 PetscFunctionBegin; 2340 /* When a process holds entire A and other processes have no entry */ 2341 if (A->cmap->N == n) { 2342 PetscCall(VecGetArrayWrite(v, &diagA)); 2343 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2344 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2345 PetscCall(VecDestroy(&diagV)); 2346 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2347 PetscFunctionReturn(PETSC_SUCCESS); 2348 } else if (n == 0) { 2349 if (m) { 2350 PetscCall(VecGetArrayWrite(v, &a)); 2351 for (r = 0; r < m; r++) { 2352 a[r] = PETSC_MAX_REAL; 2353 if (idx) idx[r] = -1; 2354 } 2355 PetscCall(VecRestoreArrayWrite(v, &a)); 2356 } 2357 PetscFunctionReturn(PETSC_SUCCESS); 2358 } 2359 2360 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2361 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2362 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2363 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2364 2365 /* Get offdiagIdx[] for implicit 0.0 */ 2366 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2367 ba = bav; 2368 bi = b->i; 2369 bj = b->j; 2370 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2371 for (r = 0; r < m; r++) { 2372 ncols = bi[r + 1] - bi[r]; 2373 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2374 offdiagA[r] = *ba; 2375 offdiagIdx[r] = cmap[0]; 2376 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2377 offdiagA[r] = 0.0; 2378 2379 /* Find first hole in the cmap */ 2380 for (j = 0; j < ncols; j++) { 2381 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2382 if (col > j && j < cstart) { 2383 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2384 break; 2385 } else if (col > j + n && j >= cstart) { 2386 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2387 break; 2388 } 2389 } 2390 if (j == ncols && ncols < A->cmap->N - n) { 2391 /* a hole is outside compressed Bcols */ 2392 if (ncols == 0) { 2393 if (cstart) { 2394 offdiagIdx[r] = 0; 2395 } else offdiagIdx[r] = cend; 2396 } else { /* ncols > 0 */ 2397 offdiagIdx[r] = cmap[ncols - 1] + 1; 2398 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2399 } 2400 } 2401 } 2402 2403 for (j = 0; j < ncols; j++) { 2404 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2405 offdiagA[r] = *ba; 2406 offdiagIdx[r] = cmap[*bj]; 2407 } 2408 ba++; 2409 bj++; 2410 } 2411 } 2412 2413 PetscCall(VecGetArrayWrite(v, &a)); 2414 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2415 for (r = 0; r < m; ++r) { 2416 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2417 a[r] = diagA[r]; 2418 if (idx) idx[r] = cstart + diagIdx[r]; 2419 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2420 a[r] = diagA[r]; 2421 if (idx) { 2422 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2423 idx[r] = cstart + diagIdx[r]; 2424 } else idx[r] = offdiagIdx[r]; 2425 } 2426 } else { 2427 a[r] = offdiagA[r]; 2428 if (idx) idx[r] = offdiagIdx[r]; 2429 } 2430 } 2431 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2432 PetscCall(VecRestoreArrayWrite(v, &a)); 2433 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2434 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2435 PetscCall(VecDestroy(&diagV)); 2436 PetscCall(VecDestroy(&offdiagV)); 2437 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2438 PetscFunctionReturn(PETSC_SUCCESS); 2439 } 2440 2441 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2442 { 2443 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2444 PetscInt m = A->rmap->n, n = A->cmap->n; 2445 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2446 PetscInt *cmap = mat->garray; 2447 PetscInt *diagIdx, *offdiagIdx; 2448 Vec diagV, offdiagV; 2449 PetscScalar *a, *diagA, *offdiagA; 2450 const PetscScalar *ba, *bav; 2451 PetscInt r, j, col, ncols, *bi, *bj; 2452 Mat B = mat->B; 2453 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2454 2455 PetscFunctionBegin; 2456 /* When a process holds entire A and other processes have no entry */ 2457 if (A->cmap->N == n) { 2458 PetscCall(VecGetArrayWrite(v, &diagA)); 2459 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2460 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2461 PetscCall(VecDestroy(&diagV)); 2462 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2463 PetscFunctionReturn(PETSC_SUCCESS); 2464 } else if (n == 0) { 2465 if (m) { 2466 PetscCall(VecGetArrayWrite(v, &a)); 2467 for (r = 0; r < m; r++) { 2468 a[r] = PETSC_MIN_REAL; 2469 if (idx) idx[r] = -1; 2470 } 2471 PetscCall(VecRestoreArrayWrite(v, &a)); 2472 } 2473 PetscFunctionReturn(PETSC_SUCCESS); 2474 } 2475 2476 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2477 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2478 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2479 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2480 2481 /* Get offdiagIdx[] for implicit 0.0 */ 2482 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2483 ba = bav; 2484 bi = b->i; 2485 bj = b->j; 2486 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2487 for (r = 0; r < m; r++) { 2488 ncols = bi[r + 1] - bi[r]; 2489 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2490 offdiagA[r] = *ba; 2491 offdiagIdx[r] = cmap[0]; 2492 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2493 offdiagA[r] = 0.0; 2494 2495 /* Find first hole in the cmap */ 2496 for (j = 0; j < ncols; j++) { 2497 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2498 if (col > j && j < cstart) { 2499 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2500 break; 2501 } else if (col > j + n && j >= cstart) { 2502 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2503 break; 2504 } 2505 } 2506 if (j == ncols && ncols < A->cmap->N - n) { 2507 /* a hole is outside compressed Bcols */ 2508 if (ncols == 0) { 2509 if (cstart) { 2510 offdiagIdx[r] = 0; 2511 } else offdiagIdx[r] = cend; 2512 } else { /* ncols > 0 */ 2513 offdiagIdx[r] = cmap[ncols - 1] + 1; 2514 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2515 } 2516 } 2517 } 2518 2519 for (j = 0; j < ncols; j++) { 2520 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2521 offdiagA[r] = *ba; 2522 offdiagIdx[r] = cmap[*bj]; 2523 } 2524 ba++; 2525 bj++; 2526 } 2527 } 2528 2529 PetscCall(VecGetArrayWrite(v, &a)); 2530 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2531 for (r = 0; r < m; ++r) { 2532 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2533 a[r] = diagA[r]; 2534 if (idx) idx[r] = cstart + diagIdx[r]; 2535 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2536 a[r] = diagA[r]; 2537 if (idx) { 2538 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2539 idx[r] = cstart + diagIdx[r]; 2540 } else idx[r] = offdiagIdx[r]; 2541 } 2542 } else { 2543 a[r] = offdiagA[r]; 2544 if (idx) idx[r] = offdiagIdx[r]; 2545 } 2546 } 2547 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2548 PetscCall(VecRestoreArrayWrite(v, &a)); 2549 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2550 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2551 PetscCall(VecDestroy(&diagV)); 2552 PetscCall(VecDestroy(&offdiagV)); 2553 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2554 PetscFunctionReturn(PETSC_SUCCESS); 2555 } 2556 2557 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2558 { 2559 Mat *dummy; 2560 2561 PetscFunctionBegin; 2562 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2563 *newmat = *dummy; 2564 PetscCall(PetscFree(dummy)); 2565 PetscFunctionReturn(PETSC_SUCCESS); 2566 } 2567 2568 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2569 { 2570 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2571 2572 PetscFunctionBegin; 2573 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2574 A->factorerrortype = a->A->factorerrortype; 2575 PetscFunctionReturn(PETSC_SUCCESS); 2576 } 2577 2578 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2579 { 2580 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2581 2582 PetscFunctionBegin; 2583 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2584 PetscCall(MatSetRandom(aij->A, rctx)); 2585 if (x->assembled) { 2586 PetscCall(MatSetRandom(aij->B, rctx)); 2587 } else { 2588 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2589 } 2590 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2591 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2592 PetscFunctionReturn(PETSC_SUCCESS); 2593 } 2594 2595 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2596 { 2597 PetscFunctionBegin; 2598 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2599 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2600 PetscFunctionReturn(PETSC_SUCCESS); 2601 } 2602 2603 /*@ 2604 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2605 2606 Not Collective 2607 2608 Input Parameter: 2609 . A - the matrix 2610 2611 Output Parameter: 2612 . nz - the number of nonzeros 2613 2614 Level: advanced 2615 2616 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2617 @*/ 2618 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2619 { 2620 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2621 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2622 PetscBool isaij; 2623 2624 PetscFunctionBegin; 2625 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2626 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2627 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2628 PetscFunctionReturn(PETSC_SUCCESS); 2629 } 2630 2631 /*@ 2632 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2633 2634 Collective 2635 2636 Input Parameters: 2637 + A - the matrix 2638 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2639 2640 Level: advanced 2641 2642 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2643 @*/ 2644 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2645 { 2646 PetscFunctionBegin; 2647 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2648 PetscFunctionReturn(PETSC_SUCCESS); 2649 } 2650 2651 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2652 { 2653 PetscBool sc = PETSC_FALSE, flg; 2654 2655 PetscFunctionBegin; 2656 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2657 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2658 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2659 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2660 PetscOptionsHeadEnd(); 2661 PetscFunctionReturn(PETSC_SUCCESS); 2662 } 2663 2664 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2665 { 2666 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2667 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2668 2669 PetscFunctionBegin; 2670 if (!Y->preallocated) { 2671 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2672 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2673 PetscInt nonew = aij->nonew; 2674 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2675 aij->nonew = nonew; 2676 } 2677 PetscCall(MatShift_Basic(Y, a)); 2678 PetscFunctionReturn(PETSC_SUCCESS); 2679 } 2680 2681 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2682 { 2683 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2684 2685 PetscFunctionBegin; 2686 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2687 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2688 if (d) { 2689 PetscInt rstart; 2690 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2691 *d += rstart; 2692 } 2693 PetscFunctionReturn(PETSC_SUCCESS); 2694 } 2695 2696 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2697 { 2698 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2699 2700 PetscFunctionBegin; 2701 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2711 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2712 PetscFunctionReturn(PETSC_SUCCESS); 2713 } 2714 2715 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2716 MatGetRow_MPIAIJ, 2717 MatRestoreRow_MPIAIJ, 2718 MatMult_MPIAIJ, 2719 /* 4*/ MatMultAdd_MPIAIJ, 2720 MatMultTranspose_MPIAIJ, 2721 MatMultTransposeAdd_MPIAIJ, 2722 NULL, 2723 NULL, 2724 NULL, 2725 /*10*/ NULL, 2726 NULL, 2727 NULL, 2728 MatSOR_MPIAIJ, 2729 MatTranspose_MPIAIJ, 2730 /*15*/ MatGetInfo_MPIAIJ, 2731 MatEqual_MPIAIJ, 2732 MatGetDiagonal_MPIAIJ, 2733 MatDiagonalScale_MPIAIJ, 2734 MatNorm_MPIAIJ, 2735 /*20*/ MatAssemblyBegin_MPIAIJ, 2736 MatAssemblyEnd_MPIAIJ, 2737 MatSetOption_MPIAIJ, 2738 MatZeroEntries_MPIAIJ, 2739 /*24*/ MatZeroRows_MPIAIJ, 2740 NULL, 2741 NULL, 2742 NULL, 2743 NULL, 2744 /*29*/ MatSetUp_MPI_Hash, 2745 NULL, 2746 NULL, 2747 MatGetDiagonalBlock_MPIAIJ, 2748 NULL, 2749 /*34*/ MatDuplicate_MPIAIJ, 2750 NULL, 2751 NULL, 2752 NULL, 2753 NULL, 2754 /*39*/ MatAXPY_MPIAIJ, 2755 MatCreateSubMatrices_MPIAIJ, 2756 MatIncreaseOverlap_MPIAIJ, 2757 MatGetValues_MPIAIJ, 2758 MatCopy_MPIAIJ, 2759 /*44*/ MatGetRowMax_MPIAIJ, 2760 MatScale_MPIAIJ, 2761 MatShift_MPIAIJ, 2762 MatDiagonalSet_MPIAIJ, 2763 MatZeroRowsColumns_MPIAIJ, 2764 /*49*/ MatSetRandom_MPIAIJ, 2765 MatGetRowIJ_MPIAIJ, 2766 MatRestoreRowIJ_MPIAIJ, 2767 NULL, 2768 NULL, 2769 /*54*/ MatFDColoringCreate_MPIXAIJ, 2770 NULL, 2771 MatSetUnfactored_MPIAIJ, 2772 MatPermute_MPIAIJ, 2773 NULL, 2774 /*59*/ MatCreateSubMatrix_MPIAIJ, 2775 MatDestroy_MPIAIJ, 2776 MatView_MPIAIJ, 2777 NULL, 2778 NULL, 2779 /*64*/ NULL, 2780 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2781 NULL, 2782 NULL, 2783 NULL, 2784 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2785 MatGetRowMinAbs_MPIAIJ, 2786 NULL, 2787 NULL, 2788 NULL, 2789 NULL, 2790 /*75*/ MatFDColoringApply_AIJ, 2791 MatSetFromOptions_MPIAIJ, 2792 NULL, 2793 NULL, 2794 MatFindZeroDiagonals_MPIAIJ, 2795 /*80*/ NULL, 2796 NULL, 2797 NULL, 2798 /*83*/ MatLoad_MPIAIJ, 2799 NULL, 2800 NULL, 2801 NULL, 2802 NULL, 2803 NULL, 2804 /*89*/ NULL, 2805 NULL, 2806 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2807 NULL, 2808 NULL, 2809 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2810 NULL, 2811 NULL, 2812 NULL, 2813 MatBindToCPU_MPIAIJ, 2814 /*99*/ MatProductSetFromOptions_MPIAIJ, 2815 NULL, 2816 NULL, 2817 MatConjugate_MPIAIJ, 2818 NULL, 2819 /*104*/ MatSetValuesRow_MPIAIJ, 2820 MatRealPart_MPIAIJ, 2821 MatImaginaryPart_MPIAIJ, 2822 NULL, 2823 NULL, 2824 /*109*/ NULL, 2825 NULL, 2826 MatGetRowMin_MPIAIJ, 2827 NULL, 2828 MatMissingDiagonal_MPIAIJ, 2829 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2830 NULL, 2831 MatGetGhosts_MPIAIJ, 2832 NULL, 2833 NULL, 2834 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2835 NULL, 2836 NULL, 2837 NULL, 2838 MatGetMultiProcBlock_MPIAIJ, 2839 /*124*/ MatFindNonzeroRows_MPIAIJ, 2840 MatGetColumnReductions_MPIAIJ, 2841 MatInvertBlockDiagonal_MPIAIJ, 2842 MatInvertVariableBlockDiagonal_MPIAIJ, 2843 MatCreateSubMatricesMPI_MPIAIJ, 2844 /*129*/ NULL, 2845 NULL, 2846 NULL, 2847 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2848 NULL, 2849 /*134*/ NULL, 2850 NULL, 2851 NULL, 2852 NULL, 2853 NULL, 2854 /*139*/ MatSetBlockSizes_MPIAIJ, 2855 NULL, 2856 NULL, 2857 MatFDColoringSetUp_MPIXAIJ, 2858 MatFindOffBlockDiagonalEntries_MPIAIJ, 2859 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2860 /*145*/ NULL, 2861 NULL, 2862 NULL, 2863 MatCreateGraph_Simple_AIJ, 2864 NULL, 2865 /*150*/ NULL, 2866 MatEliminateZeros_MPIAIJ, 2867 MatGetRowSumAbs_MPIAIJ}; 2868 2869 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2870 { 2871 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2872 2873 PetscFunctionBegin; 2874 PetscCall(MatStoreValues(aij->A)); 2875 PetscCall(MatStoreValues(aij->B)); 2876 PetscFunctionReturn(PETSC_SUCCESS); 2877 } 2878 2879 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2880 { 2881 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2882 2883 PetscFunctionBegin; 2884 PetscCall(MatRetrieveValues(aij->A)); 2885 PetscCall(MatRetrieveValues(aij->B)); 2886 PetscFunctionReturn(PETSC_SUCCESS); 2887 } 2888 2889 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2890 { 2891 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2892 PetscMPIInt size; 2893 2894 PetscFunctionBegin; 2895 if (B->hash_active) { 2896 B->ops[0] = b->cops; 2897 B->hash_active = PETSC_FALSE; 2898 } 2899 PetscCall(PetscLayoutSetUp(B->rmap)); 2900 PetscCall(PetscLayoutSetUp(B->cmap)); 2901 2902 #if defined(PETSC_USE_CTABLE) 2903 PetscCall(PetscHMapIDestroy(&b->colmap)); 2904 #else 2905 PetscCall(PetscFree(b->colmap)); 2906 #endif 2907 PetscCall(PetscFree(b->garray)); 2908 PetscCall(VecDestroy(&b->lvec)); 2909 PetscCall(VecScatterDestroy(&b->Mvctx)); 2910 2911 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2912 2913 MatSeqXAIJGetOptions_Private(b->B); 2914 PetscCall(MatDestroy(&b->B)); 2915 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2916 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2917 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2918 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2919 MatSeqXAIJRestoreOptions_Private(b->B); 2920 2921 MatSeqXAIJGetOptions_Private(b->A); 2922 PetscCall(MatDestroy(&b->A)); 2923 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2924 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2925 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2926 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2927 MatSeqXAIJRestoreOptions_Private(b->A); 2928 2929 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2930 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2931 B->preallocated = PETSC_TRUE; 2932 B->was_assembled = PETSC_FALSE; 2933 B->assembled = PETSC_FALSE; 2934 PetscFunctionReturn(PETSC_SUCCESS); 2935 } 2936 2937 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2938 { 2939 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2940 2941 PetscFunctionBegin; 2942 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2943 PetscCall(PetscLayoutSetUp(B->rmap)); 2944 PetscCall(PetscLayoutSetUp(B->cmap)); 2945 2946 #if defined(PETSC_USE_CTABLE) 2947 PetscCall(PetscHMapIDestroy(&b->colmap)); 2948 #else 2949 PetscCall(PetscFree(b->colmap)); 2950 #endif 2951 PetscCall(PetscFree(b->garray)); 2952 PetscCall(VecDestroy(&b->lvec)); 2953 PetscCall(VecScatterDestroy(&b->Mvctx)); 2954 2955 PetscCall(MatResetPreallocation(b->A)); 2956 PetscCall(MatResetPreallocation(b->B)); 2957 B->preallocated = PETSC_TRUE; 2958 B->was_assembled = PETSC_FALSE; 2959 B->assembled = PETSC_FALSE; 2960 PetscFunctionReturn(PETSC_SUCCESS); 2961 } 2962 2963 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2964 { 2965 Mat mat; 2966 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2967 2968 PetscFunctionBegin; 2969 *newmat = NULL; 2970 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2971 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2972 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2973 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2974 a = (Mat_MPIAIJ *)mat->data; 2975 2976 mat->factortype = matin->factortype; 2977 mat->assembled = matin->assembled; 2978 mat->insertmode = NOT_SET_VALUES; 2979 2980 a->size = oldmat->size; 2981 a->rank = oldmat->rank; 2982 a->donotstash = oldmat->donotstash; 2983 a->roworiented = oldmat->roworiented; 2984 a->rowindices = NULL; 2985 a->rowvalues = NULL; 2986 a->getrowactive = PETSC_FALSE; 2987 2988 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2989 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2990 if (matin->hash_active) { 2991 PetscCall(MatSetUp(mat)); 2992 } else { 2993 mat->preallocated = matin->preallocated; 2994 if (oldmat->colmap) { 2995 #if defined(PETSC_USE_CTABLE) 2996 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2997 #else 2998 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2999 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3000 #endif 3001 } else a->colmap = NULL; 3002 if (oldmat->garray) { 3003 PetscInt len; 3004 len = oldmat->B->cmap->n; 3005 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3006 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3007 } else a->garray = NULL; 3008 3009 /* It may happen MatDuplicate is called with a non-assembled matrix 3010 In fact, MatDuplicate only requires the matrix to be preallocated 3011 This may happen inside a DMCreateMatrix_Shell */ 3012 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3013 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3014 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3015 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3016 } 3017 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3018 *newmat = mat; 3019 PetscFunctionReturn(PETSC_SUCCESS); 3020 } 3021 3022 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3023 { 3024 PetscBool isbinary, ishdf5; 3025 3026 PetscFunctionBegin; 3027 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3028 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3029 /* force binary viewer to load .info file if it has not yet done so */ 3030 PetscCall(PetscViewerSetUp(viewer)); 3031 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3032 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3033 if (isbinary) { 3034 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3035 } else if (ishdf5) { 3036 #if defined(PETSC_HAVE_HDF5) 3037 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3038 #else 3039 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3040 #endif 3041 } else { 3042 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3043 } 3044 PetscFunctionReturn(PETSC_SUCCESS); 3045 } 3046 3047 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3048 { 3049 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3050 PetscInt *rowidxs, *colidxs; 3051 PetscScalar *matvals; 3052 3053 PetscFunctionBegin; 3054 PetscCall(PetscViewerSetUp(viewer)); 3055 3056 /* read in matrix header */ 3057 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3058 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3059 M = header[1]; 3060 N = header[2]; 3061 nz = header[3]; 3062 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3063 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3064 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3065 3066 /* set block sizes from the viewer's .info file */ 3067 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3068 /* set global sizes if not set already */ 3069 if (mat->rmap->N < 0) mat->rmap->N = M; 3070 if (mat->cmap->N < 0) mat->cmap->N = N; 3071 PetscCall(PetscLayoutSetUp(mat->rmap)); 3072 PetscCall(PetscLayoutSetUp(mat->cmap)); 3073 3074 /* check if the matrix sizes are correct */ 3075 PetscCall(MatGetSize(mat, &rows, &cols)); 3076 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3077 3078 /* read in row lengths and build row indices */ 3079 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3080 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3081 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3082 rowidxs[0] = 0; 3083 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3084 if (nz != PETSC_MAX_INT) { 3085 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3086 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3087 } 3088 3089 /* read in column indices and matrix values */ 3090 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3091 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3092 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3093 /* store matrix indices and values */ 3094 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3095 PetscCall(PetscFree(rowidxs)); 3096 PetscCall(PetscFree2(colidxs, matvals)); 3097 PetscFunctionReturn(PETSC_SUCCESS); 3098 } 3099 3100 /* Not scalable because of ISAllGather() unless getting all columns. */ 3101 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3102 { 3103 IS iscol_local; 3104 PetscBool isstride; 3105 PetscMPIInt lisstride = 0, gisstride; 3106 3107 PetscFunctionBegin; 3108 /* check if we are grabbing all columns*/ 3109 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3110 3111 if (isstride) { 3112 PetscInt start, len, mstart, mlen; 3113 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3114 PetscCall(ISGetLocalSize(iscol, &len)); 3115 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3116 if (mstart == start && mlen - mstart == len) lisstride = 1; 3117 } 3118 3119 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3120 if (gisstride) { 3121 PetscInt N; 3122 PetscCall(MatGetSize(mat, NULL, &N)); 3123 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3124 PetscCall(ISSetIdentity(iscol_local)); 3125 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3126 } else { 3127 PetscInt cbs; 3128 PetscCall(ISGetBlockSize(iscol, &cbs)); 3129 PetscCall(ISAllGather(iscol, &iscol_local)); 3130 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3131 } 3132 3133 *isseq = iscol_local; 3134 PetscFunctionReturn(PETSC_SUCCESS); 3135 } 3136 3137 /* 3138 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3139 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3140 3141 Input Parameters: 3142 + mat - matrix 3143 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3144 i.e., mat->rstart <= isrow[i] < mat->rend 3145 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3146 i.e., mat->cstart <= iscol[i] < mat->cend 3147 3148 Output Parameters: 3149 + isrow_d - sequential row index set for retrieving mat->A 3150 . iscol_d - sequential column index set for retrieving mat->A 3151 . iscol_o - sequential column index set for retrieving mat->B 3152 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3153 */ 3154 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3155 { 3156 Vec x, cmap; 3157 const PetscInt *is_idx; 3158 PetscScalar *xarray, *cmaparray; 3159 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3161 Mat B = a->B; 3162 Vec lvec = a->lvec, lcmap; 3163 PetscInt i, cstart, cend, Bn = B->cmap->N; 3164 MPI_Comm comm; 3165 VecScatter Mvctx = a->Mvctx; 3166 3167 PetscFunctionBegin; 3168 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3169 PetscCall(ISGetLocalSize(iscol, &ncols)); 3170 3171 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3172 PetscCall(MatCreateVecs(mat, &x, NULL)); 3173 PetscCall(VecSet(x, -1.0)); 3174 PetscCall(VecDuplicate(x, &cmap)); 3175 PetscCall(VecSet(cmap, -1.0)); 3176 3177 /* Get start indices */ 3178 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3179 isstart -= ncols; 3180 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3181 3182 PetscCall(ISGetIndices(iscol, &is_idx)); 3183 PetscCall(VecGetArray(x, &xarray)); 3184 PetscCall(VecGetArray(cmap, &cmaparray)); 3185 PetscCall(PetscMalloc1(ncols, &idx)); 3186 for (i = 0; i < ncols; i++) { 3187 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3188 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3189 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3190 } 3191 PetscCall(VecRestoreArray(x, &xarray)); 3192 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3193 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3194 3195 /* Get iscol_d */ 3196 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3197 PetscCall(ISGetBlockSize(iscol, &i)); 3198 PetscCall(ISSetBlockSize(*iscol_d, i)); 3199 3200 /* Get isrow_d */ 3201 PetscCall(ISGetLocalSize(isrow, &m)); 3202 rstart = mat->rmap->rstart; 3203 PetscCall(PetscMalloc1(m, &idx)); 3204 PetscCall(ISGetIndices(isrow, &is_idx)); 3205 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3206 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3207 3208 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3209 PetscCall(ISGetBlockSize(isrow, &i)); 3210 PetscCall(ISSetBlockSize(*isrow_d, i)); 3211 3212 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3213 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3214 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3215 3216 PetscCall(VecDuplicate(lvec, &lcmap)); 3217 3218 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3219 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3220 3221 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3222 /* off-process column indices */ 3223 count = 0; 3224 PetscCall(PetscMalloc1(Bn, &idx)); 3225 PetscCall(PetscMalloc1(Bn, &cmap1)); 3226 3227 PetscCall(VecGetArray(lvec, &xarray)); 3228 PetscCall(VecGetArray(lcmap, &cmaparray)); 3229 for (i = 0; i < Bn; i++) { 3230 if (PetscRealPart(xarray[i]) > -1.0) { 3231 idx[count] = i; /* local column index in off-diagonal part B */ 3232 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3233 count++; 3234 } 3235 } 3236 PetscCall(VecRestoreArray(lvec, &xarray)); 3237 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3238 3239 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3240 /* cannot ensure iscol_o has same blocksize as iscol! */ 3241 3242 PetscCall(PetscFree(idx)); 3243 *garray = cmap1; 3244 3245 PetscCall(VecDestroy(&x)); 3246 PetscCall(VecDestroy(&cmap)); 3247 PetscCall(VecDestroy(&lcmap)); 3248 PetscFunctionReturn(PETSC_SUCCESS); 3249 } 3250 3251 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3252 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3253 { 3254 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3255 Mat M = NULL; 3256 MPI_Comm comm; 3257 IS iscol_d, isrow_d, iscol_o; 3258 Mat Asub = NULL, Bsub = NULL; 3259 PetscInt n; 3260 3261 PetscFunctionBegin; 3262 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3263 3264 if (call == MAT_REUSE_MATRIX) { 3265 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3266 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3267 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3268 3269 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3270 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3271 3272 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3273 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3274 3275 /* Update diagonal and off-diagonal portions of submat */ 3276 asub = (Mat_MPIAIJ *)(*submat)->data; 3277 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3278 PetscCall(ISGetLocalSize(iscol_o, &n)); 3279 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3280 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3281 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3282 3283 } else { /* call == MAT_INITIAL_MATRIX) */ 3284 const PetscInt *garray; 3285 PetscInt BsubN; 3286 3287 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3288 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3289 3290 /* Create local submatrices Asub and Bsub */ 3291 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3292 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3293 3294 /* Create submatrix M */ 3295 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3296 3297 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3298 asub = (Mat_MPIAIJ *)M->data; 3299 3300 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3301 n = asub->B->cmap->N; 3302 if (BsubN > n) { 3303 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3304 const PetscInt *idx; 3305 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3306 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3307 3308 PetscCall(PetscMalloc1(n, &idx_new)); 3309 j = 0; 3310 PetscCall(ISGetIndices(iscol_o, &idx)); 3311 for (i = 0; i < n; i++) { 3312 if (j >= BsubN) break; 3313 while (subgarray[i] > garray[j]) j++; 3314 3315 if (subgarray[i] == garray[j]) { 3316 idx_new[i] = idx[j++]; 3317 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3318 } 3319 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3320 3321 PetscCall(ISDestroy(&iscol_o)); 3322 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3323 3324 } else if (BsubN < n) { 3325 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3326 } 3327 3328 PetscCall(PetscFree(garray)); 3329 *submat = M; 3330 3331 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3332 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3333 PetscCall(ISDestroy(&isrow_d)); 3334 3335 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3336 PetscCall(ISDestroy(&iscol_d)); 3337 3338 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3339 PetscCall(ISDestroy(&iscol_o)); 3340 } 3341 PetscFunctionReturn(PETSC_SUCCESS); 3342 } 3343 3344 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3345 { 3346 IS iscol_local = NULL, isrow_d; 3347 PetscInt csize; 3348 PetscInt n, i, j, start, end; 3349 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3350 MPI_Comm comm; 3351 3352 PetscFunctionBegin; 3353 /* If isrow has same processor distribution as mat, 3354 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3355 if (call == MAT_REUSE_MATRIX) { 3356 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3357 if (isrow_d) { 3358 sameRowDist = PETSC_TRUE; 3359 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3360 } else { 3361 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3362 if (iscol_local) { 3363 sameRowDist = PETSC_TRUE; 3364 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3365 } 3366 } 3367 } else { 3368 /* Check if isrow has same processor distribution as mat */ 3369 sameDist[0] = PETSC_FALSE; 3370 PetscCall(ISGetLocalSize(isrow, &n)); 3371 if (!n) { 3372 sameDist[0] = PETSC_TRUE; 3373 } else { 3374 PetscCall(ISGetMinMax(isrow, &i, &j)); 3375 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3376 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3377 } 3378 3379 /* Check if iscol has same processor distribution as mat */ 3380 sameDist[1] = PETSC_FALSE; 3381 PetscCall(ISGetLocalSize(iscol, &n)); 3382 if (!n) { 3383 sameDist[1] = PETSC_TRUE; 3384 } else { 3385 PetscCall(ISGetMinMax(iscol, &i, &j)); 3386 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3387 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3388 } 3389 3390 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3391 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3392 sameRowDist = tsameDist[0]; 3393 } 3394 3395 if (sameRowDist) { 3396 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3397 /* isrow and iscol have same processor distribution as mat */ 3398 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3399 PetscFunctionReturn(PETSC_SUCCESS); 3400 } else { /* sameRowDist */ 3401 /* isrow has same processor distribution as mat */ 3402 if (call == MAT_INITIAL_MATRIX) { 3403 PetscBool sorted; 3404 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3405 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3406 PetscCall(ISGetSize(iscol, &i)); 3407 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3408 3409 PetscCall(ISSorted(iscol_local, &sorted)); 3410 if (sorted) { 3411 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3412 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3413 PetscFunctionReturn(PETSC_SUCCESS); 3414 } 3415 } else { /* call == MAT_REUSE_MATRIX */ 3416 IS iscol_sub; 3417 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3418 if (iscol_sub) { 3419 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3420 PetscFunctionReturn(PETSC_SUCCESS); 3421 } 3422 } 3423 } 3424 } 3425 3426 /* General case: iscol -> iscol_local which has global size of iscol */ 3427 if (call == MAT_REUSE_MATRIX) { 3428 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3429 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3430 } else { 3431 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3432 } 3433 3434 PetscCall(ISGetLocalSize(iscol, &csize)); 3435 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3436 3437 if (call == MAT_INITIAL_MATRIX) { 3438 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3439 PetscCall(ISDestroy(&iscol_local)); 3440 } 3441 PetscFunctionReturn(PETSC_SUCCESS); 3442 } 3443 3444 /*@C 3445 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3446 and "off-diagonal" part of the matrix in CSR format. 3447 3448 Collective 3449 3450 Input Parameters: 3451 + comm - MPI communicator 3452 . A - "diagonal" portion of matrix 3453 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3454 - garray - global index of `B` columns 3455 3456 Output Parameter: 3457 . mat - the matrix, with input `A` as its local diagonal matrix 3458 3459 Level: advanced 3460 3461 Notes: 3462 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3463 3464 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3465 3466 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3467 @*/ 3468 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3469 { 3470 Mat_MPIAIJ *maij; 3471 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3472 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3473 const PetscScalar *oa; 3474 Mat Bnew; 3475 PetscInt m, n, N; 3476 MatType mpi_mat_type; 3477 3478 PetscFunctionBegin; 3479 PetscCall(MatCreate(comm, mat)); 3480 PetscCall(MatGetSize(A, &m, &n)); 3481 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3482 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3483 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3484 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3485 3486 /* Get global columns of mat */ 3487 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3488 3489 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3490 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3491 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3492 PetscCall(MatSetType(*mat, mpi_mat_type)); 3493 3494 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3495 maij = (Mat_MPIAIJ *)(*mat)->data; 3496 3497 (*mat)->preallocated = PETSC_TRUE; 3498 3499 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3500 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3501 3502 /* Set A as diagonal portion of *mat */ 3503 maij->A = A; 3504 3505 nz = oi[m]; 3506 for (i = 0; i < nz; i++) { 3507 col = oj[i]; 3508 oj[i] = garray[col]; 3509 } 3510 3511 /* Set Bnew as off-diagonal portion of *mat */ 3512 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3513 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3514 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3515 bnew = (Mat_SeqAIJ *)Bnew->data; 3516 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3517 maij->B = Bnew; 3518 3519 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3520 3521 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3522 b->free_a = PETSC_FALSE; 3523 b->free_ij = PETSC_FALSE; 3524 PetscCall(MatDestroy(&B)); 3525 3526 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3527 bnew->free_a = PETSC_TRUE; 3528 bnew->free_ij = PETSC_TRUE; 3529 3530 /* condense columns of maij->B */ 3531 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3532 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3533 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3534 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3535 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3536 PetscFunctionReturn(PETSC_SUCCESS); 3537 } 3538 3539 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3540 3541 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3542 { 3543 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3544 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3545 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3546 Mat M, Msub, B = a->B; 3547 MatScalar *aa; 3548 Mat_SeqAIJ *aij; 3549 PetscInt *garray = a->garray, *colsub, Ncols; 3550 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3551 IS iscol_sub, iscmap; 3552 const PetscInt *is_idx, *cmap; 3553 PetscBool allcolumns = PETSC_FALSE; 3554 MPI_Comm comm; 3555 3556 PetscFunctionBegin; 3557 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3558 if (call == MAT_REUSE_MATRIX) { 3559 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3560 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3561 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3562 3563 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3564 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3565 3566 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3567 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3568 3569 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3570 3571 } else { /* call == MAT_INITIAL_MATRIX) */ 3572 PetscBool flg; 3573 3574 PetscCall(ISGetLocalSize(iscol, &n)); 3575 PetscCall(ISGetSize(iscol, &Ncols)); 3576 3577 /* (1) iscol -> nonscalable iscol_local */ 3578 /* Check for special case: each processor gets entire matrix columns */ 3579 PetscCall(ISIdentity(iscol_local, &flg)); 3580 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3581 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3582 if (allcolumns) { 3583 iscol_sub = iscol_local; 3584 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3585 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3586 3587 } else { 3588 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3589 PetscInt *idx, *cmap1, k; 3590 PetscCall(PetscMalloc1(Ncols, &idx)); 3591 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3592 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3593 count = 0; 3594 k = 0; 3595 for (i = 0; i < Ncols; i++) { 3596 j = is_idx[i]; 3597 if (j >= cstart && j < cend) { 3598 /* diagonal part of mat */ 3599 idx[count] = j; 3600 cmap1[count++] = i; /* column index in submat */ 3601 } else if (Bn) { 3602 /* off-diagonal part of mat */ 3603 if (j == garray[k]) { 3604 idx[count] = j; 3605 cmap1[count++] = i; /* column index in submat */ 3606 } else if (j > garray[k]) { 3607 while (j > garray[k] && k < Bn - 1) k++; 3608 if (j == garray[k]) { 3609 idx[count] = j; 3610 cmap1[count++] = i; /* column index in submat */ 3611 } 3612 } 3613 } 3614 } 3615 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3616 3617 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3618 PetscCall(ISGetBlockSize(iscol, &cbs)); 3619 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3620 3621 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3622 } 3623 3624 /* (3) Create sequential Msub */ 3625 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3626 } 3627 3628 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3629 aij = (Mat_SeqAIJ *)(Msub)->data; 3630 ii = aij->i; 3631 PetscCall(ISGetIndices(iscmap, &cmap)); 3632 3633 /* 3634 m - number of local rows 3635 Ncols - number of columns (same on all processors) 3636 rstart - first row in new global matrix generated 3637 */ 3638 PetscCall(MatGetSize(Msub, &m, NULL)); 3639 3640 if (call == MAT_INITIAL_MATRIX) { 3641 /* (4) Create parallel newmat */ 3642 PetscMPIInt rank, size; 3643 PetscInt csize; 3644 3645 PetscCallMPI(MPI_Comm_size(comm, &size)); 3646 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3647 3648 /* 3649 Determine the number of non-zeros in the diagonal and off-diagonal 3650 portions of the matrix in order to do correct preallocation 3651 */ 3652 3653 /* first get start and end of "diagonal" columns */ 3654 PetscCall(ISGetLocalSize(iscol, &csize)); 3655 if (csize == PETSC_DECIDE) { 3656 PetscCall(ISGetSize(isrow, &mglobal)); 3657 if (mglobal == Ncols) { /* square matrix */ 3658 nlocal = m; 3659 } else { 3660 nlocal = Ncols / size + ((Ncols % size) > rank); 3661 } 3662 } else { 3663 nlocal = csize; 3664 } 3665 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3666 rstart = rend - nlocal; 3667 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3668 3669 /* next, compute all the lengths */ 3670 jj = aij->j; 3671 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3672 olens = dlens + m; 3673 for (i = 0; i < m; i++) { 3674 jend = ii[i + 1] - ii[i]; 3675 olen = 0; 3676 dlen = 0; 3677 for (j = 0; j < jend; j++) { 3678 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3679 else dlen++; 3680 jj++; 3681 } 3682 olens[i] = olen; 3683 dlens[i] = dlen; 3684 } 3685 3686 PetscCall(ISGetBlockSize(isrow, &bs)); 3687 PetscCall(ISGetBlockSize(iscol, &cbs)); 3688 3689 PetscCall(MatCreate(comm, &M)); 3690 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3691 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3692 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3693 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3694 PetscCall(PetscFree(dlens)); 3695 3696 } else { /* call == MAT_REUSE_MATRIX */ 3697 M = *newmat; 3698 PetscCall(MatGetLocalSize(M, &i, NULL)); 3699 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3700 PetscCall(MatZeroEntries(M)); 3701 /* 3702 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3703 rather than the slower MatSetValues(). 3704 */ 3705 M->was_assembled = PETSC_TRUE; 3706 M->assembled = PETSC_FALSE; 3707 } 3708 3709 /* (5) Set values of Msub to *newmat */ 3710 PetscCall(PetscMalloc1(count, &colsub)); 3711 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3712 3713 jj = aij->j; 3714 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3715 for (i = 0; i < m; i++) { 3716 row = rstart + i; 3717 nz = ii[i + 1] - ii[i]; 3718 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3719 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3720 jj += nz; 3721 aa += nz; 3722 } 3723 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3724 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3725 3726 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3727 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3728 3729 PetscCall(PetscFree(colsub)); 3730 3731 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3732 if (call == MAT_INITIAL_MATRIX) { 3733 *newmat = M; 3734 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3735 PetscCall(MatDestroy(&Msub)); 3736 3737 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3738 PetscCall(ISDestroy(&iscol_sub)); 3739 3740 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3741 PetscCall(ISDestroy(&iscmap)); 3742 3743 if (iscol_local) { 3744 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3745 PetscCall(ISDestroy(&iscol_local)); 3746 } 3747 } 3748 PetscFunctionReturn(PETSC_SUCCESS); 3749 } 3750 3751 /* 3752 Not great since it makes two copies of the submatrix, first an SeqAIJ 3753 in local and then by concatenating the local matrices the end result. 3754 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3755 3756 This requires a sequential iscol with all indices. 3757 */ 3758 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3759 { 3760 PetscMPIInt rank, size; 3761 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3762 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3763 Mat M, Mreuse; 3764 MatScalar *aa, *vwork; 3765 MPI_Comm comm; 3766 Mat_SeqAIJ *aij; 3767 PetscBool colflag, allcolumns = PETSC_FALSE; 3768 3769 PetscFunctionBegin; 3770 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3771 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3772 PetscCallMPI(MPI_Comm_size(comm, &size)); 3773 3774 /* Check for special case: each processor gets entire matrix columns */ 3775 PetscCall(ISIdentity(iscol, &colflag)); 3776 PetscCall(ISGetLocalSize(iscol, &n)); 3777 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3778 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3779 3780 if (call == MAT_REUSE_MATRIX) { 3781 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3782 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3783 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3784 } else { 3785 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3786 } 3787 3788 /* 3789 m - number of local rows 3790 n - number of columns (same on all processors) 3791 rstart - first row in new global matrix generated 3792 */ 3793 PetscCall(MatGetSize(Mreuse, &m, &n)); 3794 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3795 if (call == MAT_INITIAL_MATRIX) { 3796 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3797 ii = aij->i; 3798 jj = aij->j; 3799 3800 /* 3801 Determine the number of non-zeros in the diagonal and off-diagonal 3802 portions of the matrix in order to do correct preallocation 3803 */ 3804 3805 /* first get start and end of "diagonal" columns */ 3806 if (csize == PETSC_DECIDE) { 3807 PetscCall(ISGetSize(isrow, &mglobal)); 3808 if (mglobal == n) { /* square matrix */ 3809 nlocal = m; 3810 } else { 3811 nlocal = n / size + ((n % size) > rank); 3812 } 3813 } else { 3814 nlocal = csize; 3815 } 3816 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3817 rstart = rend - nlocal; 3818 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3819 3820 /* next, compute all the lengths */ 3821 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3822 olens = dlens + m; 3823 for (i = 0; i < m; i++) { 3824 jend = ii[i + 1] - ii[i]; 3825 olen = 0; 3826 dlen = 0; 3827 for (j = 0; j < jend; j++) { 3828 if (*jj < rstart || *jj >= rend) olen++; 3829 else dlen++; 3830 jj++; 3831 } 3832 olens[i] = olen; 3833 dlens[i] = dlen; 3834 } 3835 PetscCall(MatCreate(comm, &M)); 3836 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3837 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3838 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3839 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3840 PetscCall(PetscFree(dlens)); 3841 } else { 3842 PetscInt ml, nl; 3843 3844 M = *newmat; 3845 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3846 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3847 PetscCall(MatZeroEntries(M)); 3848 /* 3849 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3850 rather than the slower MatSetValues(). 3851 */ 3852 M->was_assembled = PETSC_TRUE; 3853 M->assembled = PETSC_FALSE; 3854 } 3855 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3856 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3857 ii = aij->i; 3858 jj = aij->j; 3859 3860 /* trigger copy to CPU if needed */ 3861 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3862 for (i = 0; i < m; i++) { 3863 row = rstart + i; 3864 nz = ii[i + 1] - ii[i]; 3865 cwork = jj; 3866 jj = PetscSafePointerPlusOffset(jj, nz); 3867 vwork = aa; 3868 aa = PetscSafePointerPlusOffset(aa, nz); 3869 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3870 } 3871 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3872 3873 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3874 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3875 *newmat = M; 3876 3877 /* save submatrix used in processor for next request */ 3878 if (call == MAT_INITIAL_MATRIX) { 3879 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3880 PetscCall(MatDestroy(&Mreuse)); 3881 } 3882 PetscFunctionReturn(PETSC_SUCCESS); 3883 } 3884 3885 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3886 { 3887 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3888 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3889 const PetscInt *JJ; 3890 PetscBool nooffprocentries; 3891 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3892 3893 PetscFunctionBegin; 3894 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3895 3896 PetscCall(PetscLayoutSetUp(B->rmap)); 3897 PetscCall(PetscLayoutSetUp(B->cmap)); 3898 m = B->rmap->n; 3899 cstart = B->cmap->rstart; 3900 cend = B->cmap->rend; 3901 rstart = B->rmap->rstart; 3902 3903 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3904 3905 if (PetscDefined(USE_DEBUG)) { 3906 for (i = 0; i < m; i++) { 3907 nnz = Ii[i + 1] - Ii[i]; 3908 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3909 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3910 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3911 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3912 } 3913 } 3914 3915 for (i = 0; i < m; i++) { 3916 nnz = Ii[i + 1] - Ii[i]; 3917 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3918 nnz_max = PetscMax(nnz_max, nnz); 3919 d = 0; 3920 for (j = 0; j < nnz; j++) { 3921 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3922 } 3923 d_nnz[i] = d; 3924 o_nnz[i] = nnz - d; 3925 } 3926 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3927 PetscCall(PetscFree2(d_nnz, o_nnz)); 3928 3929 for (i = 0; i < m; i++) { 3930 ii = i + rstart; 3931 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES)); 3932 } 3933 nooffprocentries = B->nooffprocentries; 3934 B->nooffprocentries = PETSC_TRUE; 3935 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3936 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3937 B->nooffprocentries = nooffprocentries; 3938 3939 /* count number of entries below block diagonal */ 3940 PetscCall(PetscFree(Aij->ld)); 3941 PetscCall(PetscCalloc1(m, &ld)); 3942 Aij->ld = ld; 3943 for (i = 0; i < m; i++) { 3944 nnz = Ii[i + 1] - Ii[i]; 3945 j = 0; 3946 while (j < nnz && J[j] < cstart) j++; 3947 ld[i] = j; 3948 if (J) J += nnz; 3949 } 3950 3951 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3952 PetscFunctionReturn(PETSC_SUCCESS); 3953 } 3954 3955 /*@ 3956 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3957 (the default parallel PETSc format). 3958 3959 Collective 3960 3961 Input Parameters: 3962 + B - the matrix 3963 . i - the indices into `j` for the start of each local row (indices start with zero) 3964 . j - the column indices for each local row (indices start with zero) 3965 - v - optional values in the matrix 3966 3967 Level: developer 3968 3969 Notes: 3970 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3971 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3972 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3973 3974 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3975 3976 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3977 3978 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3979 3980 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3981 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3982 3983 The format which is used for the sparse matrix input, is equivalent to a 3984 row-major ordering.. i.e for the following matrix, the input data expected is 3985 as shown 3986 .vb 3987 1 0 0 3988 2 0 3 P0 3989 ------- 3990 4 5 6 P1 3991 3992 Process0 [P0] rows_owned=[0,1] 3993 i = {0,1,3} [size = nrow+1 = 2+1] 3994 j = {0,0,2} [size = 3] 3995 v = {1,2,3} [size = 3] 3996 3997 Process1 [P1] rows_owned=[2] 3998 i = {0,3} [size = nrow+1 = 1+1] 3999 j = {0,1,2} [size = 3] 4000 v = {4,5,6} [size = 3] 4001 .ve 4002 4003 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4004 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4005 @*/ 4006 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4007 { 4008 PetscFunctionBegin; 4009 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4010 PetscFunctionReturn(PETSC_SUCCESS); 4011 } 4012 4013 /*@C 4014 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4015 (the default parallel PETSc format). For good matrix assembly performance 4016 the user should preallocate the matrix storage by setting the parameters 4017 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4018 4019 Collective 4020 4021 Input Parameters: 4022 + B - the matrix 4023 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4024 (same value is used for all local rows) 4025 . d_nnz - array containing the number of nonzeros in the various rows of the 4026 DIAGONAL portion of the local submatrix (possibly different for each row) 4027 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4028 The size of this array is equal to the number of local rows, i.e 'm'. 4029 For matrices that will be factored, you must leave room for (and set) 4030 the diagonal entry even if it is zero. 4031 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4032 submatrix (same value is used for all local rows). 4033 - o_nnz - array containing the number of nonzeros in the various rows of the 4034 OFF-DIAGONAL portion of the local submatrix (possibly different for 4035 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4036 structure. The size of this array is equal to the number 4037 of local rows, i.e 'm'. 4038 4039 Example Usage: 4040 Consider the following 8x8 matrix with 34 non-zero values, that is 4041 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4042 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4043 as follows 4044 4045 .vb 4046 1 2 0 | 0 3 0 | 0 4 4047 Proc0 0 5 6 | 7 0 0 | 8 0 4048 9 0 10 | 11 0 0 | 12 0 4049 ------------------------------------- 4050 13 0 14 | 15 16 17 | 0 0 4051 Proc1 0 18 0 | 19 20 21 | 0 0 4052 0 0 0 | 22 23 0 | 24 0 4053 ------------------------------------- 4054 Proc2 25 26 27 | 0 0 28 | 29 0 4055 30 0 0 | 31 32 33 | 0 34 4056 .ve 4057 4058 This can be represented as a collection of submatrices as 4059 .vb 4060 A B C 4061 D E F 4062 G H I 4063 .ve 4064 4065 Where the submatrices A,B,C are owned by proc0, D,E,F are 4066 owned by proc1, G,H,I are owned by proc2. 4067 4068 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4069 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4070 The 'M','N' parameters are 8,8, and have the same values on all procs. 4071 4072 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4073 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4074 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4075 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4076 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4077 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4078 4079 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4080 allocated for every row of the local diagonal submatrix, and `o_nz` 4081 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4082 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4083 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4084 In this case, the values of `d_nz`, `o_nz` are 4085 .vb 4086 proc0 dnz = 2, o_nz = 2 4087 proc1 dnz = 3, o_nz = 2 4088 proc2 dnz = 1, o_nz = 4 4089 .ve 4090 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4091 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4092 for proc3. i.e we are using 12+15+10=37 storage locations to store 4093 34 values. 4094 4095 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4096 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4097 In the above case the values for `d_nnz`, `o_nnz` are 4098 .vb 4099 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4100 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4101 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4102 .ve 4103 Here the space allocated is sum of all the above values i.e 34, and 4104 hence pre-allocation is perfect. 4105 4106 Level: intermediate 4107 4108 Notes: 4109 If the *_nnz parameter is given then the *_nz parameter is ignored 4110 4111 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4112 storage. The stored row and column indices begin with zero. 4113 See [Sparse Matrices](sec_matsparse) for details. 4114 4115 The parallel matrix is partitioned such that the first m0 rows belong to 4116 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4117 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4118 4119 The DIAGONAL portion of the local submatrix of a processor can be defined 4120 as the submatrix which is obtained by extraction the part corresponding to 4121 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4122 first row that belongs to the processor, r2 is the last row belonging to 4123 the this processor, and c1-c2 is range of indices of the local part of a 4124 vector suitable for applying the matrix to. This is an mxn matrix. In the 4125 common case of a square matrix, the row and column ranges are the same and 4126 the DIAGONAL part is also square. The remaining portion of the local 4127 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4128 4129 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4130 4131 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4132 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4133 You can also run with the option `-info` and look for messages with the string 4134 malloc in them to see if additional memory allocation was needed. 4135 4136 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4137 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4138 @*/ 4139 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4140 { 4141 PetscFunctionBegin; 4142 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4143 PetscValidType(B, 1); 4144 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4145 PetscFunctionReturn(PETSC_SUCCESS); 4146 } 4147 4148 /*@ 4149 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4150 CSR format for the local rows. 4151 4152 Collective 4153 4154 Input Parameters: 4155 + comm - MPI communicator 4156 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4157 . n - This value should be the same as the local size used in creating the 4158 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4159 calculated if `N` is given) For square matrices n is almost always `m`. 4160 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4161 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4162 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4163 . j - global column indices 4164 - a - optional matrix values 4165 4166 Output Parameter: 4167 . mat - the matrix 4168 4169 Level: intermediate 4170 4171 Notes: 4172 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4173 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4174 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4175 4176 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4177 4178 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4179 4180 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4181 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4182 4183 The format which is used for the sparse matrix input, is equivalent to a 4184 row-major ordering, i.e., for the following matrix, the input data expected is 4185 as shown 4186 .vb 4187 1 0 0 4188 2 0 3 P0 4189 ------- 4190 4 5 6 P1 4191 4192 Process0 [P0] rows_owned=[0,1] 4193 i = {0,1,3} [size = nrow+1 = 2+1] 4194 j = {0,0,2} [size = 3] 4195 v = {1,2,3} [size = 3] 4196 4197 Process1 [P1] rows_owned=[2] 4198 i = {0,3} [size = nrow+1 = 1+1] 4199 j = {0,1,2} [size = 3] 4200 v = {4,5,6} [size = 3] 4201 .ve 4202 4203 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4204 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4205 @*/ 4206 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4207 { 4208 PetscFunctionBegin; 4209 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4210 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4211 PetscCall(MatCreate(comm, mat)); 4212 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4213 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4214 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4215 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4216 PetscFunctionReturn(PETSC_SUCCESS); 4217 } 4218 4219 /*@ 4220 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4221 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4222 from `MatCreateMPIAIJWithArrays()` 4223 4224 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4225 4226 Collective 4227 4228 Input Parameters: 4229 + mat - the matrix 4230 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4231 . n - This value should be the same as the local size used in creating the 4232 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4233 calculated if N is given) For square matrices n is almost always m. 4234 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4235 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4236 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4237 . J - column indices 4238 - v - matrix values 4239 4240 Level: deprecated 4241 4242 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4243 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4244 @*/ 4245 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4246 { 4247 PetscInt nnz, i; 4248 PetscBool nooffprocentries; 4249 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4250 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4251 PetscScalar *ad, *ao; 4252 PetscInt ldi, Iii, md; 4253 const PetscInt *Adi = Ad->i; 4254 PetscInt *ld = Aij->ld; 4255 4256 PetscFunctionBegin; 4257 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4258 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4259 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4260 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4261 4262 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4263 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4264 4265 for (i = 0; i < m; i++) { 4266 if (PetscDefined(USE_DEBUG)) { 4267 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4268 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4269 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4270 } 4271 } 4272 nnz = Ii[i + 1] - Ii[i]; 4273 Iii = Ii[i]; 4274 ldi = ld[i]; 4275 md = Adi[i + 1] - Adi[i]; 4276 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4277 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4278 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4279 ad += md; 4280 ao += nnz - md; 4281 } 4282 nooffprocentries = mat->nooffprocentries; 4283 mat->nooffprocentries = PETSC_TRUE; 4284 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4285 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4286 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4287 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4288 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4289 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4290 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4291 mat->nooffprocentries = nooffprocentries; 4292 PetscFunctionReturn(PETSC_SUCCESS); 4293 } 4294 4295 /*@ 4296 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4297 4298 Collective 4299 4300 Input Parameters: 4301 + mat - the matrix 4302 - v - matrix values, stored by row 4303 4304 Level: intermediate 4305 4306 Notes: 4307 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4308 4309 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4310 4311 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4312 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4313 @*/ 4314 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4315 { 4316 PetscInt nnz, i, m; 4317 PetscBool nooffprocentries; 4318 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4319 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4320 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4321 PetscScalar *ad, *ao; 4322 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4323 PetscInt ldi, Iii, md; 4324 PetscInt *ld = Aij->ld; 4325 4326 PetscFunctionBegin; 4327 m = mat->rmap->n; 4328 4329 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4330 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4331 Iii = 0; 4332 for (i = 0; i < m; i++) { 4333 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4334 ldi = ld[i]; 4335 md = Adi[i + 1] - Adi[i]; 4336 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4337 ad += md; 4338 if (ao) { 4339 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4340 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4341 ao += nnz - md; 4342 } 4343 Iii += nnz; 4344 } 4345 nooffprocentries = mat->nooffprocentries; 4346 mat->nooffprocentries = PETSC_TRUE; 4347 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4348 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4349 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4350 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4351 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4352 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4353 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4354 mat->nooffprocentries = nooffprocentries; 4355 PetscFunctionReturn(PETSC_SUCCESS); 4356 } 4357 4358 /*@C 4359 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4360 (the default parallel PETSc format). For good matrix assembly performance 4361 the user should preallocate the matrix storage by setting the parameters 4362 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4363 4364 Collective 4365 4366 Input Parameters: 4367 + comm - MPI communicator 4368 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4369 This value should be the same as the local size used in creating the 4370 y vector for the matrix-vector product y = Ax. 4371 . n - This value should be the same as the local size used in creating the 4372 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4373 calculated if N is given) For square matrices n is almost always m. 4374 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4375 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4376 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4377 (same value is used for all local rows) 4378 . d_nnz - array containing the number of nonzeros in the various rows of the 4379 DIAGONAL portion of the local submatrix (possibly different for each row) 4380 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4381 The size of this array is equal to the number of local rows, i.e 'm'. 4382 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4383 submatrix (same value is used for all local rows). 4384 - o_nnz - array containing the number of nonzeros in the various rows of the 4385 OFF-DIAGONAL portion of the local submatrix (possibly different for 4386 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4387 structure. The size of this array is equal to the number 4388 of local rows, i.e 'm'. 4389 4390 Output Parameter: 4391 . A - the matrix 4392 4393 Options Database Keys: 4394 + -mat_no_inode - Do not use inodes 4395 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4396 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4397 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4398 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4399 4400 Level: intermediate 4401 4402 Notes: 4403 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4404 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4405 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4406 4407 If the *_nnz parameter is given then the *_nz parameter is ignored 4408 4409 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4410 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4411 storage requirements for this matrix. 4412 4413 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4414 processor than it must be used on all processors that share the object for 4415 that argument. 4416 4417 The user MUST specify either the local or global matrix dimensions 4418 (possibly both). 4419 4420 The parallel matrix is partitioned across processors such that the 4421 first m0 rows belong to process 0, the next m1 rows belong to 4422 process 1, the next m2 rows belong to process 2 etc.. where 4423 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4424 values corresponding to [m x N] submatrix. 4425 4426 The columns are logically partitioned with the n0 columns belonging 4427 to 0th partition, the next n1 columns belonging to the next 4428 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4429 4430 The DIAGONAL portion of the local submatrix on any given processor 4431 is the submatrix corresponding to the rows and columns m,n 4432 corresponding to the given processor. i.e diagonal matrix on 4433 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4434 etc. The remaining portion of the local submatrix [m x (N-n)] 4435 constitute the OFF-DIAGONAL portion. The example below better 4436 illustrates this concept. 4437 4438 For a square global matrix we define each processor's diagonal portion 4439 to be its local rows and the corresponding columns (a square submatrix); 4440 each processor's off-diagonal portion encompasses the remainder of the 4441 local matrix (a rectangular submatrix). 4442 4443 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4444 4445 When calling this routine with a single process communicator, a matrix of 4446 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4447 type of communicator, use the construction mechanism 4448 .vb 4449 MatCreate(..., &A); 4450 MatSetType(A, MATMPIAIJ); 4451 MatSetSizes(A, m, n, M, N); 4452 MatMPIAIJSetPreallocation(A, ...); 4453 .ve 4454 4455 By default, this format uses inodes (identical nodes) when possible. 4456 We search for consecutive rows with the same nonzero structure, thereby 4457 reusing matrix information to achieve increased efficiency. 4458 4459 Example Usage: 4460 Consider the following 8x8 matrix with 34 non-zero values, that is 4461 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4462 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4463 as follows 4464 4465 .vb 4466 1 2 0 | 0 3 0 | 0 4 4467 Proc0 0 5 6 | 7 0 0 | 8 0 4468 9 0 10 | 11 0 0 | 12 0 4469 ------------------------------------- 4470 13 0 14 | 15 16 17 | 0 0 4471 Proc1 0 18 0 | 19 20 21 | 0 0 4472 0 0 0 | 22 23 0 | 24 0 4473 ------------------------------------- 4474 Proc2 25 26 27 | 0 0 28 | 29 0 4475 30 0 0 | 31 32 33 | 0 34 4476 .ve 4477 4478 This can be represented as a collection of submatrices as 4479 4480 .vb 4481 A B C 4482 D E F 4483 G H I 4484 .ve 4485 4486 Where the submatrices A,B,C are owned by proc0, D,E,F are 4487 owned by proc1, G,H,I are owned by proc2. 4488 4489 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4490 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4491 The 'M','N' parameters are 8,8, and have the same values on all procs. 4492 4493 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4494 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4495 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4496 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4497 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4498 matrix, ans [DF] as another SeqAIJ matrix. 4499 4500 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4501 allocated for every row of the local diagonal submatrix, and `o_nz` 4502 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4503 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4504 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4505 In this case, the values of `d_nz`,`o_nz` are 4506 .vb 4507 proc0 dnz = 2, o_nz = 2 4508 proc1 dnz = 3, o_nz = 2 4509 proc2 dnz = 1, o_nz = 4 4510 .ve 4511 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4512 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4513 for proc3. i.e we are using 12+15+10=37 storage locations to store 4514 34 values. 4515 4516 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4517 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4518 In the above case the values for d_nnz,o_nnz are 4519 .vb 4520 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4521 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4522 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4523 .ve 4524 Here the space allocated is sum of all the above values i.e 34, and 4525 hence pre-allocation is perfect. 4526 4527 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4528 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4529 @*/ 4530 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4531 { 4532 PetscMPIInt size; 4533 4534 PetscFunctionBegin; 4535 PetscCall(MatCreate(comm, A)); 4536 PetscCall(MatSetSizes(*A, m, n, M, N)); 4537 PetscCallMPI(MPI_Comm_size(comm, &size)); 4538 if (size > 1) { 4539 PetscCall(MatSetType(*A, MATMPIAIJ)); 4540 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4541 } else { 4542 PetscCall(MatSetType(*A, MATSEQAIJ)); 4543 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4544 } 4545 PetscFunctionReturn(PETSC_SUCCESS); 4546 } 4547 4548 /*MC 4549 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4550 4551 Synopsis: 4552 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4553 4554 Not Collective 4555 4556 Input Parameter: 4557 . A - the `MATMPIAIJ` matrix 4558 4559 Output Parameters: 4560 + Ad - the diagonal portion of the matrix 4561 . Ao - the off-diagonal portion of the matrix 4562 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4563 - ierr - error code 4564 4565 Level: advanced 4566 4567 Note: 4568 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4569 4570 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4571 M*/ 4572 4573 /*MC 4574 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4575 4576 Synopsis: 4577 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4578 4579 Not Collective 4580 4581 Input Parameters: 4582 + A - the `MATMPIAIJ` matrix 4583 . Ad - the diagonal portion of the matrix 4584 . Ao - the off-diagonal portion of the matrix 4585 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4586 - ierr - error code 4587 4588 Level: advanced 4589 4590 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4591 M*/ 4592 4593 /*@C 4594 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4595 4596 Not Collective 4597 4598 Input Parameter: 4599 . A - The `MATMPIAIJ` matrix 4600 4601 Output Parameters: 4602 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4603 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4604 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4605 4606 Level: intermediate 4607 4608 Note: 4609 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4610 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4611 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4612 local column numbers to global column numbers in the original matrix. 4613 4614 Fortran Notes: 4615 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4616 4617 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4618 @*/ 4619 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4620 { 4621 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4622 PetscBool flg; 4623 4624 PetscFunctionBegin; 4625 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4626 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4627 if (Ad) *Ad = a->A; 4628 if (Ao) *Ao = a->B; 4629 if (colmap) *colmap = a->garray; 4630 PetscFunctionReturn(PETSC_SUCCESS); 4631 } 4632 4633 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4634 { 4635 PetscInt m, N, i, rstart, nnz, Ii; 4636 PetscInt *indx; 4637 PetscScalar *values; 4638 MatType rootType; 4639 4640 PetscFunctionBegin; 4641 PetscCall(MatGetSize(inmat, &m, &N)); 4642 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4643 PetscInt *dnz, *onz, sum, bs, cbs; 4644 4645 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4646 /* Check sum(n) = N */ 4647 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4648 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4649 4650 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4651 rstart -= m; 4652 4653 MatPreallocateBegin(comm, m, n, dnz, onz); 4654 for (i = 0; i < m; i++) { 4655 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4656 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4657 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4658 } 4659 4660 PetscCall(MatCreate(comm, outmat)); 4661 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4662 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4663 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4664 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4665 PetscCall(MatSetType(*outmat, rootType)); 4666 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4667 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4668 MatPreallocateEnd(dnz, onz); 4669 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4670 } 4671 4672 /* numeric phase */ 4673 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4674 for (i = 0; i < m; i++) { 4675 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4676 Ii = i + rstart; 4677 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4678 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4679 } 4680 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4681 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4682 PetscFunctionReturn(PETSC_SUCCESS); 4683 } 4684 4685 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4686 { 4687 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4688 4689 PetscFunctionBegin; 4690 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4691 PetscCall(PetscFree(merge->id_r)); 4692 PetscCall(PetscFree(merge->len_s)); 4693 PetscCall(PetscFree(merge->len_r)); 4694 PetscCall(PetscFree(merge->bi)); 4695 PetscCall(PetscFree(merge->bj)); 4696 PetscCall(PetscFree(merge->buf_ri[0])); 4697 PetscCall(PetscFree(merge->buf_ri)); 4698 PetscCall(PetscFree(merge->buf_rj[0])); 4699 PetscCall(PetscFree(merge->buf_rj)); 4700 PetscCall(PetscFree(merge->coi)); 4701 PetscCall(PetscFree(merge->coj)); 4702 PetscCall(PetscFree(merge->owners_co)); 4703 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4704 PetscCall(PetscFree(merge)); 4705 PetscFunctionReturn(PETSC_SUCCESS); 4706 } 4707 4708 #include <../src/mat/utils/freespace.h> 4709 #include <petscbt.h> 4710 4711 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4712 { 4713 MPI_Comm comm; 4714 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4715 PetscMPIInt size, rank, taga, *len_s; 4716 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4717 PetscInt proc, m; 4718 PetscInt **buf_ri, **buf_rj; 4719 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4720 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4721 MPI_Request *s_waits, *r_waits; 4722 MPI_Status *status; 4723 const MatScalar *aa, *a_a; 4724 MatScalar **abuf_r, *ba_i; 4725 Mat_Merge_SeqsToMPI *merge; 4726 PetscContainer container; 4727 4728 PetscFunctionBegin; 4729 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4730 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4731 4732 PetscCallMPI(MPI_Comm_size(comm, &size)); 4733 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4734 4735 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4736 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4737 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4738 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4739 aa = a_a; 4740 4741 bi = merge->bi; 4742 bj = merge->bj; 4743 buf_ri = merge->buf_ri; 4744 buf_rj = merge->buf_rj; 4745 4746 PetscCall(PetscMalloc1(size, &status)); 4747 owners = merge->rowmap->range; 4748 len_s = merge->len_s; 4749 4750 /* send and recv matrix values */ 4751 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4752 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4753 4754 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4755 for (proc = 0, k = 0; proc < size; proc++) { 4756 if (!len_s[proc]) continue; 4757 i = owners[proc]; 4758 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4759 k++; 4760 } 4761 4762 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4763 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4764 PetscCall(PetscFree(status)); 4765 4766 PetscCall(PetscFree(s_waits)); 4767 PetscCall(PetscFree(r_waits)); 4768 4769 /* insert mat values of mpimat */ 4770 PetscCall(PetscMalloc1(N, &ba_i)); 4771 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4772 4773 for (k = 0; k < merge->nrecv; k++) { 4774 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4775 nrows = *buf_ri_k[k]; 4776 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4777 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4778 } 4779 4780 /* set values of ba */ 4781 m = merge->rowmap->n; 4782 for (i = 0; i < m; i++) { 4783 arow = owners[rank] + i; 4784 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4785 bnzi = bi[i + 1] - bi[i]; 4786 PetscCall(PetscArrayzero(ba_i, bnzi)); 4787 4788 /* add local non-zero vals of this proc's seqmat into ba */ 4789 anzi = ai[arow + 1] - ai[arow]; 4790 aj = a->j + ai[arow]; 4791 aa = a_a + ai[arow]; 4792 nextaj = 0; 4793 for (j = 0; nextaj < anzi; j++) { 4794 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4795 ba_i[j] += aa[nextaj++]; 4796 } 4797 } 4798 4799 /* add received vals into ba */ 4800 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4801 /* i-th row */ 4802 if (i == *nextrow[k]) { 4803 anzi = *(nextai[k] + 1) - *nextai[k]; 4804 aj = buf_rj[k] + *nextai[k]; 4805 aa = abuf_r[k] + *nextai[k]; 4806 nextaj = 0; 4807 for (j = 0; nextaj < anzi; j++) { 4808 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4809 ba_i[j] += aa[nextaj++]; 4810 } 4811 } 4812 nextrow[k]++; 4813 nextai[k]++; 4814 } 4815 } 4816 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4817 } 4818 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4819 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4820 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4821 4822 PetscCall(PetscFree(abuf_r[0])); 4823 PetscCall(PetscFree(abuf_r)); 4824 PetscCall(PetscFree(ba_i)); 4825 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4826 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4827 PetscFunctionReturn(PETSC_SUCCESS); 4828 } 4829 4830 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4831 { 4832 Mat B_mpi; 4833 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4834 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4835 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4836 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4837 PetscInt len, proc, *dnz, *onz, bs, cbs; 4838 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4839 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4840 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4841 MPI_Status *status; 4842 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4843 PetscBT lnkbt; 4844 Mat_Merge_SeqsToMPI *merge; 4845 PetscContainer container; 4846 4847 PetscFunctionBegin; 4848 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4849 4850 /* make sure it is a PETSc comm */ 4851 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4852 PetscCallMPI(MPI_Comm_size(comm, &size)); 4853 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4854 4855 PetscCall(PetscNew(&merge)); 4856 PetscCall(PetscMalloc1(size, &status)); 4857 4858 /* determine row ownership */ 4859 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4860 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4861 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4862 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4863 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4864 PetscCall(PetscMalloc1(size, &len_si)); 4865 PetscCall(PetscMalloc1(size, &merge->len_s)); 4866 4867 m = merge->rowmap->n; 4868 owners = merge->rowmap->range; 4869 4870 /* determine the number of messages to send, their lengths */ 4871 len_s = merge->len_s; 4872 4873 len = 0; /* length of buf_si[] */ 4874 merge->nsend = 0; 4875 for (proc = 0; proc < size; proc++) { 4876 len_si[proc] = 0; 4877 if (proc == rank) { 4878 len_s[proc] = 0; 4879 } else { 4880 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4881 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4882 } 4883 if (len_s[proc]) { 4884 merge->nsend++; 4885 nrows = 0; 4886 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4887 if (ai[i + 1] > ai[i]) nrows++; 4888 } 4889 len_si[proc] = 2 * (nrows + 1); 4890 len += len_si[proc]; 4891 } 4892 } 4893 4894 /* determine the number and length of messages to receive for ij-structure */ 4895 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4896 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4897 4898 /* post the Irecv of j-structure */ 4899 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4900 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4901 4902 /* post the Isend of j-structure */ 4903 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4904 4905 for (proc = 0, k = 0; proc < size; proc++) { 4906 if (!len_s[proc]) continue; 4907 i = owners[proc]; 4908 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4909 k++; 4910 } 4911 4912 /* receives and sends of j-structure are complete */ 4913 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4914 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4915 4916 /* send and recv i-structure */ 4917 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4918 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4919 4920 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4921 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4922 for (proc = 0, k = 0; proc < size; proc++) { 4923 if (!len_s[proc]) continue; 4924 /* form outgoing message for i-structure: 4925 buf_si[0]: nrows to be sent 4926 [1:nrows]: row index (global) 4927 [nrows+1:2*nrows+1]: i-structure index 4928 */ 4929 nrows = len_si[proc] / 2 - 1; 4930 buf_si_i = buf_si + nrows + 1; 4931 buf_si[0] = nrows; 4932 buf_si_i[0] = 0; 4933 nrows = 0; 4934 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4935 anzi = ai[i + 1] - ai[i]; 4936 if (anzi) { 4937 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4938 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4939 nrows++; 4940 } 4941 } 4942 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4943 k++; 4944 buf_si += len_si[proc]; 4945 } 4946 4947 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4948 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4949 4950 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4951 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4952 4953 PetscCall(PetscFree(len_si)); 4954 PetscCall(PetscFree(len_ri)); 4955 PetscCall(PetscFree(rj_waits)); 4956 PetscCall(PetscFree2(si_waits, sj_waits)); 4957 PetscCall(PetscFree(ri_waits)); 4958 PetscCall(PetscFree(buf_s)); 4959 PetscCall(PetscFree(status)); 4960 4961 /* compute a local seq matrix in each processor */ 4962 /* allocate bi array and free space for accumulating nonzero column info */ 4963 PetscCall(PetscMalloc1(m + 1, &bi)); 4964 bi[0] = 0; 4965 4966 /* create and initialize a linked list */ 4967 nlnk = N + 1; 4968 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4969 4970 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4971 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4972 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4973 4974 current_space = free_space; 4975 4976 /* determine symbolic info for each local row */ 4977 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4978 4979 for (k = 0; k < merge->nrecv; k++) { 4980 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4981 nrows = *buf_ri_k[k]; 4982 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4983 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4984 } 4985 4986 MatPreallocateBegin(comm, m, n, dnz, onz); 4987 len = 0; 4988 for (i = 0; i < m; i++) { 4989 bnzi = 0; 4990 /* add local non-zero cols of this proc's seqmat into lnk */ 4991 arow = owners[rank] + i; 4992 anzi = ai[arow + 1] - ai[arow]; 4993 aj = a->j + ai[arow]; 4994 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4995 bnzi += nlnk; 4996 /* add received col data into lnk */ 4997 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4998 if (i == *nextrow[k]) { /* i-th row */ 4999 anzi = *(nextai[k] + 1) - *nextai[k]; 5000 aj = buf_rj[k] + *nextai[k]; 5001 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5002 bnzi += nlnk; 5003 nextrow[k]++; 5004 nextai[k]++; 5005 } 5006 } 5007 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5008 5009 /* if free space is not available, make more free space */ 5010 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5011 /* copy data into free space, then initialize lnk */ 5012 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5013 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5014 5015 current_space->array += bnzi; 5016 current_space->local_used += bnzi; 5017 current_space->local_remaining -= bnzi; 5018 5019 bi[i + 1] = bi[i] + bnzi; 5020 } 5021 5022 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5023 5024 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5025 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5026 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5027 5028 /* create symbolic parallel matrix B_mpi */ 5029 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5030 PetscCall(MatCreate(comm, &B_mpi)); 5031 if (n == PETSC_DECIDE) { 5032 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5033 } else { 5034 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5035 } 5036 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5037 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5038 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5039 MatPreallocateEnd(dnz, onz); 5040 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5041 5042 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5043 B_mpi->assembled = PETSC_FALSE; 5044 merge->bi = bi; 5045 merge->bj = bj; 5046 merge->buf_ri = buf_ri; 5047 merge->buf_rj = buf_rj; 5048 merge->coi = NULL; 5049 merge->coj = NULL; 5050 merge->owners_co = NULL; 5051 5052 PetscCall(PetscCommDestroy(&comm)); 5053 5054 /* attach the supporting struct to B_mpi for reuse */ 5055 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5056 PetscCall(PetscContainerSetPointer(container, merge)); 5057 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5058 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5059 PetscCall(PetscContainerDestroy(&container)); 5060 *mpimat = B_mpi; 5061 5062 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5063 PetscFunctionReturn(PETSC_SUCCESS); 5064 } 5065 5066 /*@C 5067 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5068 matrices from each processor 5069 5070 Collective 5071 5072 Input Parameters: 5073 + comm - the communicators the parallel matrix will live on 5074 . seqmat - the input sequential matrices 5075 . m - number of local rows (or `PETSC_DECIDE`) 5076 . n - number of local columns (or `PETSC_DECIDE`) 5077 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5078 5079 Output Parameter: 5080 . mpimat - the parallel matrix generated 5081 5082 Level: advanced 5083 5084 Note: 5085 The dimensions of the sequential matrix in each processor MUST be the same. 5086 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5087 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5088 5089 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5090 @*/ 5091 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5092 { 5093 PetscMPIInt size; 5094 5095 PetscFunctionBegin; 5096 PetscCallMPI(MPI_Comm_size(comm, &size)); 5097 if (size == 1) { 5098 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5099 if (scall == MAT_INITIAL_MATRIX) { 5100 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5101 } else { 5102 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5103 } 5104 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5105 PetscFunctionReturn(PETSC_SUCCESS); 5106 } 5107 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5108 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5109 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5110 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5111 PetscFunctionReturn(PETSC_SUCCESS); 5112 } 5113 5114 /*@ 5115 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5116 5117 Not Collective 5118 5119 Input Parameter: 5120 . A - the matrix 5121 5122 Output Parameter: 5123 . A_loc - the local sequential matrix generated 5124 5125 Level: developer 5126 5127 Notes: 5128 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5129 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5130 `n` is the global column count obtained with `MatGetSize()` 5131 5132 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5133 5134 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5135 5136 Destroy the matrix with `MatDestroy()` 5137 5138 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5139 @*/ 5140 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5141 { 5142 PetscBool mpi; 5143 5144 PetscFunctionBegin; 5145 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5146 if (mpi) { 5147 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5148 } else { 5149 *A_loc = A; 5150 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5151 } 5152 PetscFunctionReturn(PETSC_SUCCESS); 5153 } 5154 5155 /*@ 5156 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5157 5158 Not Collective 5159 5160 Input Parameters: 5161 + A - the matrix 5162 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5163 5164 Output Parameter: 5165 . A_loc - the local sequential matrix generated 5166 5167 Level: developer 5168 5169 Notes: 5170 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5171 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5172 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5173 5174 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5175 5176 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5177 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5178 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5179 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5180 5181 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5182 @*/ 5183 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5184 { 5185 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5186 Mat_SeqAIJ *mat, *a, *b; 5187 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5188 const PetscScalar *aa, *ba, *aav, *bav; 5189 PetscScalar *ca, *cam; 5190 PetscMPIInt size; 5191 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5192 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5193 PetscBool match; 5194 5195 PetscFunctionBegin; 5196 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5197 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5198 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5199 if (size == 1) { 5200 if (scall == MAT_INITIAL_MATRIX) { 5201 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5202 *A_loc = mpimat->A; 5203 } else if (scall == MAT_REUSE_MATRIX) { 5204 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5205 } 5206 PetscFunctionReturn(PETSC_SUCCESS); 5207 } 5208 5209 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5210 a = (Mat_SeqAIJ *)mpimat->A->data; 5211 b = (Mat_SeqAIJ *)mpimat->B->data; 5212 ai = a->i; 5213 aj = a->j; 5214 bi = b->i; 5215 bj = b->j; 5216 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5217 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5218 aa = aav; 5219 ba = bav; 5220 if (scall == MAT_INITIAL_MATRIX) { 5221 PetscCall(PetscMalloc1(1 + am, &ci)); 5222 ci[0] = 0; 5223 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5224 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5225 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5226 k = 0; 5227 for (i = 0; i < am; i++) { 5228 ncols_o = bi[i + 1] - bi[i]; 5229 ncols_d = ai[i + 1] - ai[i]; 5230 /* off-diagonal portion of A */ 5231 for (jo = 0; jo < ncols_o; jo++) { 5232 col = cmap[*bj]; 5233 if (col >= cstart) break; 5234 cj[k] = col; 5235 bj++; 5236 ca[k++] = *ba++; 5237 } 5238 /* diagonal portion of A */ 5239 for (j = 0; j < ncols_d; j++) { 5240 cj[k] = cstart + *aj++; 5241 ca[k++] = *aa++; 5242 } 5243 /* off-diagonal portion of A */ 5244 for (j = jo; j < ncols_o; j++) { 5245 cj[k] = cmap[*bj++]; 5246 ca[k++] = *ba++; 5247 } 5248 } 5249 /* put together the new matrix */ 5250 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5251 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5252 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5253 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5254 mat->free_a = PETSC_TRUE; 5255 mat->free_ij = PETSC_TRUE; 5256 mat->nonew = 0; 5257 } else if (scall == MAT_REUSE_MATRIX) { 5258 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5259 ci = mat->i; 5260 cj = mat->j; 5261 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5262 for (i = 0; i < am; i++) { 5263 /* off-diagonal portion of A */ 5264 ncols_o = bi[i + 1] - bi[i]; 5265 for (jo = 0; jo < ncols_o; jo++) { 5266 col = cmap[*bj]; 5267 if (col >= cstart) break; 5268 *cam++ = *ba++; 5269 bj++; 5270 } 5271 /* diagonal portion of A */ 5272 ncols_d = ai[i + 1] - ai[i]; 5273 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5274 /* off-diagonal portion of A */ 5275 for (j = jo; j < ncols_o; j++) { 5276 *cam++ = *ba++; 5277 bj++; 5278 } 5279 } 5280 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5281 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5282 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5283 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5284 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5285 PetscFunctionReturn(PETSC_SUCCESS); 5286 } 5287 5288 /*@ 5289 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5290 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5291 5292 Not Collective 5293 5294 Input Parameters: 5295 + A - the matrix 5296 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5297 5298 Output Parameters: 5299 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5300 - A_loc - the local sequential matrix generated 5301 5302 Level: developer 5303 5304 Note: 5305 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5306 part, then those associated with the off-diagonal part (in its local ordering) 5307 5308 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5309 @*/ 5310 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5311 { 5312 Mat Ao, Ad; 5313 const PetscInt *cmap; 5314 PetscMPIInt size; 5315 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5316 5317 PetscFunctionBegin; 5318 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5319 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5320 if (size == 1) { 5321 if (scall == MAT_INITIAL_MATRIX) { 5322 PetscCall(PetscObjectReference((PetscObject)Ad)); 5323 *A_loc = Ad; 5324 } else if (scall == MAT_REUSE_MATRIX) { 5325 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5326 } 5327 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5328 PetscFunctionReturn(PETSC_SUCCESS); 5329 } 5330 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5331 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5332 if (f) { 5333 PetscCall((*f)(A, scall, glob, A_loc)); 5334 } else { 5335 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5336 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5337 Mat_SeqAIJ *c; 5338 PetscInt *ai = a->i, *aj = a->j; 5339 PetscInt *bi = b->i, *bj = b->j; 5340 PetscInt *ci, *cj; 5341 const PetscScalar *aa, *ba; 5342 PetscScalar *ca; 5343 PetscInt i, j, am, dn, on; 5344 5345 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5346 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5347 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5348 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5349 if (scall == MAT_INITIAL_MATRIX) { 5350 PetscInt k; 5351 PetscCall(PetscMalloc1(1 + am, &ci)); 5352 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5353 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5354 ci[0] = 0; 5355 for (i = 0, k = 0; i < am; i++) { 5356 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5357 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5358 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5359 /* diagonal portion of A */ 5360 for (j = 0; j < ncols_d; j++, k++) { 5361 cj[k] = *aj++; 5362 ca[k] = *aa++; 5363 } 5364 /* off-diagonal portion of A */ 5365 for (j = 0; j < ncols_o; j++, k++) { 5366 cj[k] = dn + *bj++; 5367 ca[k] = *ba++; 5368 } 5369 } 5370 /* put together the new matrix */ 5371 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5372 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5373 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5374 c = (Mat_SeqAIJ *)(*A_loc)->data; 5375 c->free_a = PETSC_TRUE; 5376 c->free_ij = PETSC_TRUE; 5377 c->nonew = 0; 5378 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5379 } else if (scall == MAT_REUSE_MATRIX) { 5380 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5381 for (i = 0; i < am; i++) { 5382 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5383 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5384 /* diagonal portion of A */ 5385 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5386 /* off-diagonal portion of A */ 5387 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5388 } 5389 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5390 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5391 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5392 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5393 if (glob) { 5394 PetscInt cst, *gidx; 5395 5396 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5397 PetscCall(PetscMalloc1(dn + on, &gidx)); 5398 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5399 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5400 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5401 } 5402 } 5403 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5404 PetscFunctionReturn(PETSC_SUCCESS); 5405 } 5406 5407 /*@C 5408 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5409 5410 Not Collective 5411 5412 Input Parameters: 5413 + A - the matrix 5414 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5415 . row - index set of rows to extract (or `NULL`) 5416 - col - index set of columns to extract (or `NULL`) 5417 5418 Output Parameter: 5419 . A_loc - the local sequential matrix generated 5420 5421 Level: developer 5422 5423 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5424 @*/ 5425 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5426 { 5427 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5428 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5429 IS isrowa, iscola; 5430 Mat *aloc; 5431 PetscBool match; 5432 5433 PetscFunctionBegin; 5434 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5435 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5436 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5437 if (!row) { 5438 start = A->rmap->rstart; 5439 end = A->rmap->rend; 5440 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5441 } else { 5442 isrowa = *row; 5443 } 5444 if (!col) { 5445 start = A->cmap->rstart; 5446 cmap = a->garray; 5447 nzA = a->A->cmap->n; 5448 nzB = a->B->cmap->n; 5449 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5450 ncols = 0; 5451 for (i = 0; i < nzB; i++) { 5452 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5453 else break; 5454 } 5455 imark = i; 5456 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5457 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5458 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5459 } else { 5460 iscola = *col; 5461 } 5462 if (scall != MAT_INITIAL_MATRIX) { 5463 PetscCall(PetscMalloc1(1, &aloc)); 5464 aloc[0] = *A_loc; 5465 } 5466 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5467 if (!col) { /* attach global id of condensed columns */ 5468 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5469 } 5470 *A_loc = aloc[0]; 5471 PetscCall(PetscFree(aloc)); 5472 if (!row) PetscCall(ISDestroy(&isrowa)); 5473 if (!col) PetscCall(ISDestroy(&iscola)); 5474 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5475 PetscFunctionReturn(PETSC_SUCCESS); 5476 } 5477 5478 /* 5479 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5480 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5481 * on a global size. 5482 * */ 5483 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5484 { 5485 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5486 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5487 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5488 PetscMPIInt owner; 5489 PetscSFNode *iremote, *oiremote; 5490 const PetscInt *lrowindices; 5491 PetscSF sf, osf; 5492 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5493 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5494 MPI_Comm comm; 5495 ISLocalToGlobalMapping mapping; 5496 const PetscScalar *pd_a, *po_a; 5497 5498 PetscFunctionBegin; 5499 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5500 /* plocalsize is the number of roots 5501 * nrows is the number of leaves 5502 * */ 5503 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5504 PetscCall(ISGetLocalSize(rows, &nrows)); 5505 PetscCall(PetscCalloc1(nrows, &iremote)); 5506 PetscCall(ISGetIndices(rows, &lrowindices)); 5507 for (i = 0; i < nrows; i++) { 5508 /* Find a remote index and an owner for a row 5509 * The row could be local or remote 5510 * */ 5511 owner = 0; 5512 lidx = 0; 5513 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5514 iremote[i].index = lidx; 5515 iremote[i].rank = owner; 5516 } 5517 /* Create SF to communicate how many nonzero columns for each row */ 5518 PetscCall(PetscSFCreate(comm, &sf)); 5519 /* SF will figure out the number of nonzero columns for each row, and their 5520 * offsets 5521 * */ 5522 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5523 PetscCall(PetscSFSetFromOptions(sf)); 5524 PetscCall(PetscSFSetUp(sf)); 5525 5526 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5527 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5528 PetscCall(PetscCalloc1(nrows, &pnnz)); 5529 roffsets[0] = 0; 5530 roffsets[1] = 0; 5531 for (i = 0; i < plocalsize; i++) { 5532 /* diagonal */ 5533 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5534 /* off-diagonal */ 5535 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5536 /* compute offsets so that we relative location for each row */ 5537 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5538 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5539 } 5540 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5541 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5542 /* 'r' means root, and 'l' means leaf */ 5543 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5544 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5545 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5546 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5547 PetscCall(PetscSFDestroy(&sf)); 5548 PetscCall(PetscFree(roffsets)); 5549 PetscCall(PetscFree(nrcols)); 5550 dntotalcols = 0; 5551 ontotalcols = 0; 5552 ncol = 0; 5553 for (i = 0; i < nrows; i++) { 5554 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5555 ncol = PetscMax(pnnz[i], ncol); 5556 /* diagonal */ 5557 dntotalcols += nlcols[i * 2 + 0]; 5558 /* off-diagonal */ 5559 ontotalcols += nlcols[i * 2 + 1]; 5560 } 5561 /* We do not need to figure the right number of columns 5562 * since all the calculations will be done by going through the raw data 5563 * */ 5564 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5565 PetscCall(MatSetUp(*P_oth)); 5566 PetscCall(PetscFree(pnnz)); 5567 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5568 /* diagonal */ 5569 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5570 /* off-diagonal */ 5571 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5572 /* diagonal */ 5573 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5574 /* off-diagonal */ 5575 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5576 dntotalcols = 0; 5577 ontotalcols = 0; 5578 ntotalcols = 0; 5579 for (i = 0; i < nrows; i++) { 5580 owner = 0; 5581 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5582 /* Set iremote for diag matrix */ 5583 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5584 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5585 iremote[dntotalcols].rank = owner; 5586 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5587 ilocal[dntotalcols++] = ntotalcols++; 5588 } 5589 /* off-diagonal */ 5590 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5591 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5592 oiremote[ontotalcols].rank = owner; 5593 oilocal[ontotalcols++] = ntotalcols++; 5594 } 5595 } 5596 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5597 PetscCall(PetscFree(loffsets)); 5598 PetscCall(PetscFree(nlcols)); 5599 PetscCall(PetscSFCreate(comm, &sf)); 5600 /* P serves as roots and P_oth is leaves 5601 * Diag matrix 5602 * */ 5603 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5604 PetscCall(PetscSFSetFromOptions(sf)); 5605 PetscCall(PetscSFSetUp(sf)); 5606 5607 PetscCall(PetscSFCreate(comm, &osf)); 5608 /* off-diagonal */ 5609 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5610 PetscCall(PetscSFSetFromOptions(osf)); 5611 PetscCall(PetscSFSetUp(osf)); 5612 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5613 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5614 /* operate on the matrix internal data to save memory */ 5615 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5616 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5617 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5618 /* Convert to global indices for diag matrix */ 5619 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5620 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5621 /* We want P_oth store global indices */ 5622 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5623 /* Use memory scalable approach */ 5624 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5625 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5626 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5627 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5628 /* Convert back to local indices */ 5629 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5630 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5631 nout = 0; 5632 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5633 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5634 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5635 /* Exchange values */ 5636 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5637 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5638 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5639 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5640 /* Stop PETSc from shrinking memory */ 5641 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5642 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5643 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5644 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5645 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5646 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5647 PetscCall(PetscSFDestroy(&sf)); 5648 PetscCall(PetscSFDestroy(&osf)); 5649 PetscFunctionReturn(PETSC_SUCCESS); 5650 } 5651 5652 /* 5653 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5654 * This supports MPIAIJ and MAIJ 5655 * */ 5656 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5657 { 5658 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5659 Mat_SeqAIJ *p_oth; 5660 IS rows, map; 5661 PetscHMapI hamp; 5662 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5663 MPI_Comm comm; 5664 PetscSF sf, osf; 5665 PetscBool has; 5666 5667 PetscFunctionBegin; 5668 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5669 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5670 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5671 * and then create a submatrix (that often is an overlapping matrix) 5672 * */ 5673 if (reuse == MAT_INITIAL_MATRIX) { 5674 /* Use a hash table to figure out unique keys */ 5675 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5676 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5677 count = 0; 5678 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5679 for (i = 0; i < a->B->cmap->n; i++) { 5680 key = a->garray[i] / dof; 5681 PetscCall(PetscHMapIHas(hamp, key, &has)); 5682 if (!has) { 5683 mapping[i] = count; 5684 PetscCall(PetscHMapISet(hamp, key, count++)); 5685 } else { 5686 /* Current 'i' has the same value the previous step */ 5687 mapping[i] = count - 1; 5688 } 5689 } 5690 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5691 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5692 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5693 PetscCall(PetscCalloc1(htsize, &rowindices)); 5694 off = 0; 5695 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5696 PetscCall(PetscHMapIDestroy(&hamp)); 5697 PetscCall(PetscSortInt(htsize, rowindices)); 5698 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5699 /* In case, the matrix was already created but users want to recreate the matrix */ 5700 PetscCall(MatDestroy(P_oth)); 5701 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5702 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5703 PetscCall(ISDestroy(&map)); 5704 PetscCall(ISDestroy(&rows)); 5705 } else if (reuse == MAT_REUSE_MATRIX) { 5706 /* If matrix was already created, we simply update values using SF objects 5707 * that as attached to the matrix earlier. 5708 */ 5709 const PetscScalar *pd_a, *po_a; 5710 5711 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5712 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5713 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5714 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5715 /* Update values in place */ 5716 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5717 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5718 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5719 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5720 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5721 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5722 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5723 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5724 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5725 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5726 PetscFunctionReturn(PETSC_SUCCESS); 5727 } 5728 5729 /*@C 5730 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5731 5732 Collective 5733 5734 Input Parameters: 5735 + A - the first matrix in `MATMPIAIJ` format 5736 . B - the second matrix in `MATMPIAIJ` format 5737 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5738 5739 Output Parameters: 5740 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5741 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5742 - B_seq - the sequential matrix generated 5743 5744 Level: developer 5745 5746 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5747 @*/ 5748 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5749 { 5750 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5751 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5752 IS isrowb, iscolb; 5753 Mat *bseq = NULL; 5754 5755 PetscFunctionBegin; 5756 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5757 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5758 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5759 5760 if (scall == MAT_INITIAL_MATRIX) { 5761 start = A->cmap->rstart; 5762 cmap = a->garray; 5763 nzA = a->A->cmap->n; 5764 nzB = a->B->cmap->n; 5765 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5766 ncols = 0; 5767 for (i = 0; i < nzB; i++) { /* row < local row index */ 5768 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5769 else break; 5770 } 5771 imark = i; 5772 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5773 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5774 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5775 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5776 } else { 5777 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5778 isrowb = *rowb; 5779 iscolb = *colb; 5780 PetscCall(PetscMalloc1(1, &bseq)); 5781 bseq[0] = *B_seq; 5782 } 5783 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5784 *B_seq = bseq[0]; 5785 PetscCall(PetscFree(bseq)); 5786 if (!rowb) { 5787 PetscCall(ISDestroy(&isrowb)); 5788 } else { 5789 *rowb = isrowb; 5790 } 5791 if (!colb) { 5792 PetscCall(ISDestroy(&iscolb)); 5793 } else { 5794 *colb = iscolb; 5795 } 5796 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5797 PetscFunctionReturn(PETSC_SUCCESS); 5798 } 5799 5800 /* 5801 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5802 of the OFF-DIAGONAL portion of local A 5803 5804 Collective 5805 5806 Input Parameters: 5807 + A,B - the matrices in `MATMPIAIJ` format 5808 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5809 5810 Output Parameter: 5811 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5812 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5813 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5814 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5815 5816 Developer Note: 5817 This directly accesses information inside the VecScatter associated with the matrix-vector product 5818 for this matrix. This is not desirable.. 5819 5820 Level: developer 5821 5822 */ 5823 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5824 { 5825 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5826 Mat_SeqAIJ *b_oth; 5827 VecScatter ctx; 5828 MPI_Comm comm; 5829 const PetscMPIInt *rprocs, *sprocs; 5830 const PetscInt *srow, *rstarts, *sstarts; 5831 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5832 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5833 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5834 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5835 PetscMPIInt size, tag, rank, nreqs; 5836 5837 PetscFunctionBegin; 5838 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5839 PetscCallMPI(MPI_Comm_size(comm, &size)); 5840 5841 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5842 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5843 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5844 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5845 5846 if (size == 1) { 5847 startsj_s = NULL; 5848 bufa_ptr = NULL; 5849 *B_oth = NULL; 5850 PetscFunctionReturn(PETSC_SUCCESS); 5851 } 5852 5853 ctx = a->Mvctx; 5854 tag = ((PetscObject)ctx)->tag; 5855 5856 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5857 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5858 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5859 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5860 PetscCall(PetscMalloc1(nreqs, &reqs)); 5861 rwaits = reqs; 5862 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5863 5864 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5865 if (scall == MAT_INITIAL_MATRIX) { 5866 /* i-array */ 5867 /* post receives */ 5868 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5869 for (i = 0; i < nrecvs; i++) { 5870 rowlen = rvalues + rstarts[i] * rbs; 5871 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5872 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5873 } 5874 5875 /* pack the outgoing message */ 5876 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5877 5878 sstartsj[0] = 0; 5879 rstartsj[0] = 0; 5880 len = 0; /* total length of j or a array to be sent */ 5881 if (nsends) { 5882 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5883 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5884 } 5885 for (i = 0; i < nsends; i++) { 5886 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5887 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5888 for (j = 0; j < nrows; j++) { 5889 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5890 for (l = 0; l < sbs; l++) { 5891 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5892 5893 rowlen[j * sbs + l] = ncols; 5894 5895 len += ncols; 5896 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5897 } 5898 k++; 5899 } 5900 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5901 5902 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5903 } 5904 /* recvs and sends of i-array are completed */ 5905 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5906 PetscCall(PetscFree(svalues)); 5907 5908 /* allocate buffers for sending j and a arrays */ 5909 PetscCall(PetscMalloc1(len + 1, &bufj)); 5910 PetscCall(PetscMalloc1(len + 1, &bufa)); 5911 5912 /* create i-array of B_oth */ 5913 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5914 5915 b_othi[0] = 0; 5916 len = 0; /* total length of j or a array to be received */ 5917 k = 0; 5918 for (i = 0; i < nrecvs; i++) { 5919 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5920 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5921 for (j = 0; j < nrows; j++) { 5922 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5923 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5924 k++; 5925 } 5926 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5927 } 5928 PetscCall(PetscFree(rvalues)); 5929 5930 /* allocate space for j and a arrays of B_oth */ 5931 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5932 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5933 5934 /* j-array */ 5935 /* post receives of j-array */ 5936 for (i = 0; i < nrecvs; i++) { 5937 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5938 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5939 } 5940 5941 /* pack the outgoing message j-array */ 5942 if (nsends) k = sstarts[0]; 5943 for (i = 0; i < nsends; i++) { 5944 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5945 bufJ = bufj + sstartsj[i]; 5946 for (j = 0; j < nrows; j++) { 5947 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5948 for (ll = 0; ll < sbs; ll++) { 5949 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5950 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5951 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5952 } 5953 } 5954 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5955 } 5956 5957 /* recvs and sends of j-array are completed */ 5958 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5959 } else if (scall == MAT_REUSE_MATRIX) { 5960 sstartsj = *startsj_s; 5961 rstartsj = *startsj_r; 5962 bufa = *bufa_ptr; 5963 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5964 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5965 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5966 5967 /* a-array */ 5968 /* post receives of a-array */ 5969 for (i = 0; i < nrecvs; i++) { 5970 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5971 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5972 } 5973 5974 /* pack the outgoing message a-array */ 5975 if (nsends) k = sstarts[0]; 5976 for (i = 0; i < nsends; i++) { 5977 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5978 bufA = bufa + sstartsj[i]; 5979 for (j = 0; j < nrows; j++) { 5980 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5981 for (ll = 0; ll < sbs; ll++) { 5982 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5983 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5984 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5985 } 5986 } 5987 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5988 } 5989 /* recvs and sends of a-array are completed */ 5990 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5991 PetscCall(PetscFree(reqs)); 5992 5993 if (scall == MAT_INITIAL_MATRIX) { 5994 /* put together the new matrix */ 5995 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5996 5997 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5998 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5999 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6000 b_oth->free_a = PETSC_TRUE; 6001 b_oth->free_ij = PETSC_TRUE; 6002 b_oth->nonew = 0; 6003 6004 PetscCall(PetscFree(bufj)); 6005 if (!startsj_s || !bufa_ptr) { 6006 PetscCall(PetscFree2(sstartsj, rstartsj)); 6007 PetscCall(PetscFree(bufa_ptr)); 6008 } else { 6009 *startsj_s = sstartsj; 6010 *startsj_r = rstartsj; 6011 *bufa_ptr = bufa; 6012 } 6013 } else if (scall == MAT_REUSE_MATRIX) { 6014 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6015 } 6016 6017 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6018 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6019 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6020 PetscFunctionReturn(PETSC_SUCCESS); 6021 } 6022 6023 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6024 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6025 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6026 #if defined(PETSC_HAVE_MKL_SPARSE) 6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6028 #endif 6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6031 #if defined(PETSC_HAVE_ELEMENTAL) 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6033 #endif 6034 #if defined(PETSC_HAVE_SCALAPACK) 6035 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6036 #endif 6037 #if defined(PETSC_HAVE_HYPRE) 6038 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6039 #endif 6040 #if defined(PETSC_HAVE_CUDA) 6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6042 #endif 6043 #if defined(PETSC_HAVE_HIP) 6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6045 #endif 6046 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6048 #endif 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6050 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6051 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6052 6053 /* 6054 Computes (B'*A')' since computing B*A directly is untenable 6055 6056 n p p 6057 [ ] [ ] [ ] 6058 m [ A ] * n [ B ] = m [ C ] 6059 [ ] [ ] [ ] 6060 6061 */ 6062 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6063 { 6064 Mat At, Bt, Ct; 6065 6066 PetscFunctionBegin; 6067 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6068 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6069 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6070 PetscCall(MatDestroy(&At)); 6071 PetscCall(MatDestroy(&Bt)); 6072 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6073 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6074 PetscCall(MatDestroy(&Ct)); 6075 PetscFunctionReturn(PETSC_SUCCESS); 6076 } 6077 6078 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6079 { 6080 PetscBool cisdense; 6081 6082 PetscFunctionBegin; 6083 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6084 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6085 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6086 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6087 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6088 PetscCall(MatSetUp(C)); 6089 6090 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6091 PetscFunctionReturn(PETSC_SUCCESS); 6092 } 6093 6094 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6095 { 6096 Mat_Product *product = C->product; 6097 Mat A = product->A, B = product->B; 6098 6099 PetscFunctionBegin; 6100 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6101 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6102 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6103 C->ops->productsymbolic = MatProductSymbolic_AB; 6104 PetscFunctionReturn(PETSC_SUCCESS); 6105 } 6106 6107 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6108 { 6109 Mat_Product *product = C->product; 6110 6111 PetscFunctionBegin; 6112 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6113 PetscFunctionReturn(PETSC_SUCCESS); 6114 } 6115 6116 /* 6117 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6118 6119 Input Parameters: 6120 6121 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6122 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6123 6124 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6125 6126 For Set1, j1[] contains column indices of the nonzeros. 6127 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6128 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6129 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6130 6131 Similar for Set2. 6132 6133 This routine merges the two sets of nonzeros row by row and removes repeats. 6134 6135 Output Parameters: (memory is allocated by the caller) 6136 6137 i[],j[]: the CSR of the merged matrix, which has m rows. 6138 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6139 imap2[]: similar to imap1[], but for Set2. 6140 Note we order nonzeros row-by-row and from left to right. 6141 */ 6142 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6143 { 6144 PetscInt r, m; /* Row index of mat */ 6145 PetscCount t, t1, t2, b1, e1, b2, e2; 6146 6147 PetscFunctionBegin; 6148 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6149 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6150 i[0] = 0; 6151 for (r = 0; r < m; r++) { /* Do row by row merging */ 6152 b1 = rowBegin1[r]; 6153 e1 = rowEnd1[r]; 6154 b2 = rowBegin2[r]; 6155 e2 = rowEnd2[r]; 6156 while (b1 < e1 && b2 < e2) { 6157 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6158 j[t] = j1[b1]; 6159 imap1[t1] = t; 6160 imap2[t2] = t; 6161 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6162 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6163 t1++; 6164 t2++; 6165 t++; 6166 } else if (j1[b1] < j2[b2]) { 6167 j[t] = j1[b1]; 6168 imap1[t1] = t; 6169 b1 += jmap1[t1 + 1] - jmap1[t1]; 6170 t1++; 6171 t++; 6172 } else { 6173 j[t] = j2[b2]; 6174 imap2[t2] = t; 6175 b2 += jmap2[t2 + 1] - jmap2[t2]; 6176 t2++; 6177 t++; 6178 } 6179 } 6180 /* Merge the remaining in either j1[] or j2[] */ 6181 while (b1 < e1) { 6182 j[t] = j1[b1]; 6183 imap1[t1] = t; 6184 b1 += jmap1[t1 + 1] - jmap1[t1]; 6185 t1++; 6186 t++; 6187 } 6188 while (b2 < e2) { 6189 j[t] = j2[b2]; 6190 imap2[t2] = t; 6191 b2 += jmap2[t2 + 1] - jmap2[t2]; 6192 t2++; 6193 t++; 6194 } 6195 i[r + 1] = t; 6196 } 6197 PetscFunctionReturn(PETSC_SUCCESS); 6198 } 6199 6200 /* 6201 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6202 6203 Input Parameters: 6204 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6205 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6206 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6207 6208 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6209 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6210 6211 Output Parameters: 6212 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6213 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6214 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6215 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6216 6217 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6218 Atot: number of entries belonging to the diagonal block. 6219 Annz: number of unique nonzeros belonging to the diagonal block. 6220 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6221 repeats (i.e., same 'i,j' pair). 6222 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6223 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6224 6225 Atot: number of entries belonging to the diagonal block 6226 Annz: number of unique nonzeros belonging to the diagonal block. 6227 6228 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6229 6230 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6231 */ 6232 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6233 { 6234 PetscInt cstart, cend, rstart, rend, row, col; 6235 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6236 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6237 PetscCount k, m, p, q, r, s, mid; 6238 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6239 6240 PetscFunctionBegin; 6241 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6242 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6243 m = rend - rstart; 6244 6245 /* Skip negative rows */ 6246 for (k = 0; k < n; k++) 6247 if (i[k] >= 0) break; 6248 6249 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6250 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6251 */ 6252 while (k < n) { 6253 row = i[k]; 6254 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6255 for (s = k; s < n; s++) 6256 if (i[s] != row) break; 6257 6258 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6259 for (p = k; p < s; p++) { 6260 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6261 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6262 } 6263 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6264 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6265 rowBegin[row - rstart] = k; 6266 rowMid[row - rstart] = mid; 6267 rowEnd[row - rstart] = s; 6268 6269 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6270 Atot += mid - k; 6271 Btot += s - mid; 6272 6273 /* Count unique nonzeros of this diag row */ 6274 for (p = k; p < mid;) { 6275 col = j[p]; 6276 do { 6277 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6278 p++; 6279 } while (p < mid && j[p] == col); 6280 Annz++; 6281 } 6282 6283 /* Count unique nonzeros of this offdiag row */ 6284 for (p = mid; p < s;) { 6285 col = j[p]; 6286 do { 6287 p++; 6288 } while (p < s && j[p] == col); 6289 Bnnz++; 6290 } 6291 k = s; 6292 } 6293 6294 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6295 PetscCall(PetscMalloc1(Atot, &Aperm)); 6296 PetscCall(PetscMalloc1(Btot, &Bperm)); 6297 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6298 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6299 6300 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6301 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6302 for (r = 0; r < m; r++) { 6303 k = rowBegin[r]; 6304 mid = rowMid[r]; 6305 s = rowEnd[r]; 6306 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6307 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6308 Atot += mid - k; 6309 Btot += s - mid; 6310 6311 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6312 for (p = k; p < mid;) { 6313 col = j[p]; 6314 q = p; 6315 do { 6316 p++; 6317 } while (p < mid && j[p] == col); 6318 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6319 Annz++; 6320 } 6321 6322 for (p = mid; p < s;) { 6323 col = j[p]; 6324 q = p; 6325 do { 6326 p++; 6327 } while (p < s && j[p] == col); 6328 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6329 Bnnz++; 6330 } 6331 } 6332 /* Output */ 6333 *Aperm_ = Aperm; 6334 *Annz_ = Annz; 6335 *Atot_ = Atot; 6336 *Ajmap_ = Ajmap; 6337 *Bperm_ = Bperm; 6338 *Bnnz_ = Bnnz; 6339 *Btot_ = Btot; 6340 *Bjmap_ = Bjmap; 6341 PetscFunctionReturn(PETSC_SUCCESS); 6342 } 6343 6344 /* 6345 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6346 6347 Input Parameters: 6348 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6349 nnz: number of unique nonzeros in the merged matrix 6350 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6351 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6352 6353 Output Parameter: (memory is allocated by the caller) 6354 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6355 6356 Example: 6357 nnz1 = 4 6358 nnz = 6 6359 imap = [1,3,4,5] 6360 jmap = [0,3,5,6,7] 6361 then, 6362 jmap_new = [0,0,3,3,5,6,7] 6363 */ 6364 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6365 { 6366 PetscCount k, p; 6367 6368 PetscFunctionBegin; 6369 jmap_new[0] = 0; 6370 p = nnz; /* p loops over jmap_new[] backwards */ 6371 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6372 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6373 } 6374 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6375 PetscFunctionReturn(PETSC_SUCCESS); 6376 } 6377 6378 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6379 { 6380 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6381 6382 PetscFunctionBegin; 6383 PetscCall(PetscSFDestroy(&coo->sf)); 6384 PetscCall(PetscFree(coo->Aperm1)); 6385 PetscCall(PetscFree(coo->Bperm1)); 6386 PetscCall(PetscFree(coo->Ajmap1)); 6387 PetscCall(PetscFree(coo->Bjmap1)); 6388 PetscCall(PetscFree(coo->Aimap2)); 6389 PetscCall(PetscFree(coo->Bimap2)); 6390 PetscCall(PetscFree(coo->Aperm2)); 6391 PetscCall(PetscFree(coo->Bperm2)); 6392 PetscCall(PetscFree(coo->Ajmap2)); 6393 PetscCall(PetscFree(coo->Bjmap2)); 6394 PetscCall(PetscFree(coo->Cperm1)); 6395 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6396 PetscCall(PetscFree(coo)); 6397 PetscFunctionReturn(PETSC_SUCCESS); 6398 } 6399 6400 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6401 { 6402 MPI_Comm comm; 6403 PetscMPIInt rank, size; 6404 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6405 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6406 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6407 PetscContainer container; 6408 MatCOOStruct_MPIAIJ *coo; 6409 6410 PetscFunctionBegin; 6411 PetscCall(PetscFree(mpiaij->garray)); 6412 PetscCall(VecDestroy(&mpiaij->lvec)); 6413 #if defined(PETSC_USE_CTABLE) 6414 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6415 #else 6416 PetscCall(PetscFree(mpiaij->colmap)); 6417 #endif 6418 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6419 mat->assembled = PETSC_FALSE; 6420 mat->was_assembled = PETSC_FALSE; 6421 6422 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6423 PetscCallMPI(MPI_Comm_size(comm, &size)); 6424 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6425 PetscCall(PetscLayoutSetUp(mat->rmap)); 6426 PetscCall(PetscLayoutSetUp(mat->cmap)); 6427 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6428 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6429 PetscCall(MatGetLocalSize(mat, &m, &n)); 6430 PetscCall(MatGetSize(mat, &M, &N)); 6431 6432 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6433 /* entries come first, then local rows, then remote rows. */ 6434 PetscCount n1 = coo_n, *perm1; 6435 PetscInt *i1 = coo_i, *j1 = coo_j; 6436 6437 PetscCall(PetscMalloc1(n1, &perm1)); 6438 for (k = 0; k < n1; k++) perm1[k] = k; 6439 6440 /* Manipulate indices so that entries with negative row or col indices will have smallest 6441 row indices, local entries will have greater but negative row indices, and remote entries 6442 will have positive row indices. 6443 */ 6444 for (k = 0; k < n1; k++) { 6445 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6446 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6447 else { 6448 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6449 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6450 } 6451 } 6452 6453 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6454 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6455 6456 /* Advance k to the first entry we need to take care of */ 6457 for (k = 0; k < n1; k++) 6458 if (i1[k] > PETSC_MIN_INT) break; 6459 PetscInt i1start = k; 6460 6461 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6462 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6463 6464 /* Send remote rows to their owner */ 6465 /* Find which rows should be sent to which remote ranks*/ 6466 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6467 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6468 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6469 const PetscInt *ranges; 6470 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6471 6472 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6473 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6474 for (k = rem; k < n1;) { 6475 PetscMPIInt owner; 6476 PetscInt firstRow, lastRow; 6477 6478 /* Locate a row range */ 6479 firstRow = i1[k]; /* first row of this owner */ 6480 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6481 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6482 6483 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6484 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6485 6486 /* All entries in [k,p) belong to this remote owner */ 6487 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6488 PetscMPIInt *sendto2; 6489 PetscInt *nentries2; 6490 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6491 6492 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6493 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6494 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6495 PetscCall(PetscFree2(sendto, nentries2)); 6496 sendto = sendto2; 6497 nentries = nentries2; 6498 maxNsend = maxNsend2; 6499 } 6500 sendto[nsend] = owner; 6501 nentries[nsend] = p - k; 6502 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6503 nsend++; 6504 k = p; 6505 } 6506 6507 /* Build 1st SF to know offsets on remote to send data */ 6508 PetscSF sf1; 6509 PetscInt nroots = 1, nroots2 = 0; 6510 PetscInt nleaves = nsend, nleaves2 = 0; 6511 PetscInt *offsets; 6512 PetscSFNode *iremote; 6513 6514 PetscCall(PetscSFCreate(comm, &sf1)); 6515 PetscCall(PetscMalloc1(nsend, &iremote)); 6516 PetscCall(PetscMalloc1(nsend, &offsets)); 6517 for (k = 0; k < nsend; k++) { 6518 iremote[k].rank = sendto[k]; 6519 iremote[k].index = 0; 6520 nleaves2 += nentries[k]; 6521 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6522 } 6523 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6524 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6525 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6526 PetscCall(PetscSFDestroy(&sf1)); 6527 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6528 6529 /* Build 2nd SF to send remote COOs to their owner */ 6530 PetscSF sf2; 6531 nroots = nroots2; 6532 nleaves = nleaves2; 6533 PetscCall(PetscSFCreate(comm, &sf2)); 6534 PetscCall(PetscSFSetFromOptions(sf2)); 6535 PetscCall(PetscMalloc1(nleaves, &iremote)); 6536 p = 0; 6537 for (k = 0; k < nsend; k++) { 6538 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6539 for (q = 0; q < nentries[k]; q++, p++) { 6540 iremote[p].rank = sendto[k]; 6541 iremote[p].index = offsets[k] + q; 6542 } 6543 } 6544 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6545 6546 /* Send the remote COOs to their owner */ 6547 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6548 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6549 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6550 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6551 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6552 PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */ 6553 PetscInt *j1prem = j1 ? j1 + rem : NULL; 6554 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6555 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6556 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6557 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6558 6559 PetscCall(PetscFree(offsets)); 6560 PetscCall(PetscFree2(sendto, nentries)); 6561 6562 /* Sort received COOs by row along with the permutation array */ 6563 for (k = 0; k < n2; k++) perm2[k] = k; 6564 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6565 6566 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6567 PetscCount *Cperm1; 6568 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6569 PetscCount *perm1prem = perm1 ? perm1 + rem : NULL; 6570 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6571 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6572 6573 /* Support for HYPRE matrices, kind of a hack. 6574 Swap min column with diagonal so that diagonal values will go first */ 6575 PetscBool hypre; 6576 const char *name; 6577 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6578 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6579 if (hypre) { 6580 PetscInt *minj; 6581 PetscBT hasdiag; 6582 6583 PetscCall(PetscBTCreate(m, &hasdiag)); 6584 PetscCall(PetscMalloc1(m, &minj)); 6585 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6586 for (k = i1start; k < rem; k++) { 6587 if (j1[k] < cstart || j1[k] >= cend) continue; 6588 const PetscInt rindex = i1[k] - rstart; 6589 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6590 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6591 } 6592 for (k = 0; k < n2; k++) { 6593 if (j2[k] < cstart || j2[k] >= cend) continue; 6594 const PetscInt rindex = i2[k] - rstart; 6595 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6596 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6597 } 6598 for (k = i1start; k < rem; k++) { 6599 const PetscInt rindex = i1[k] - rstart; 6600 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6601 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6602 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6603 } 6604 for (k = 0; k < n2; k++) { 6605 const PetscInt rindex = i2[k] - rstart; 6606 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6607 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6608 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6609 } 6610 PetscCall(PetscBTDestroy(&hasdiag)); 6611 PetscCall(PetscFree(minj)); 6612 } 6613 6614 /* Split local COOs and received COOs into diag/offdiag portions */ 6615 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6616 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6617 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6618 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6619 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6620 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6621 6622 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6623 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6624 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6625 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6626 6627 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6628 PetscInt *Ai, *Bi; 6629 PetscInt *Aj, *Bj; 6630 6631 PetscCall(PetscMalloc1(m + 1, &Ai)); 6632 PetscCall(PetscMalloc1(m + 1, &Bi)); 6633 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6634 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6635 6636 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6637 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6638 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6639 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6640 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6641 6642 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6643 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6644 6645 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6646 /* expect nonzeros in A/B most likely have local contributing entries */ 6647 PetscInt Annz = Ai[m]; 6648 PetscInt Bnnz = Bi[m]; 6649 PetscCount *Ajmap1_new, *Bjmap1_new; 6650 6651 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6652 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6653 6654 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6655 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6656 6657 PetscCall(PetscFree(Aimap1)); 6658 PetscCall(PetscFree(Ajmap1)); 6659 PetscCall(PetscFree(Bimap1)); 6660 PetscCall(PetscFree(Bjmap1)); 6661 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6662 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6663 PetscCall(PetscFree(perm1)); 6664 PetscCall(PetscFree3(i2, j2, perm2)); 6665 6666 Ajmap1 = Ajmap1_new; 6667 Bjmap1 = Bjmap1_new; 6668 6669 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6670 if (Annz < Annz1 + Annz2) { 6671 PetscInt *Aj_new; 6672 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6673 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6674 PetscCall(PetscFree(Aj)); 6675 Aj = Aj_new; 6676 } 6677 6678 if (Bnnz < Bnnz1 + Bnnz2) { 6679 PetscInt *Bj_new; 6680 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6681 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6682 PetscCall(PetscFree(Bj)); 6683 Bj = Bj_new; 6684 } 6685 6686 /* Create new submatrices for on-process and off-process coupling */ 6687 PetscScalar *Aa, *Ba; 6688 MatType rtype; 6689 Mat_SeqAIJ *a, *b; 6690 PetscObjectState state; 6691 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6692 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6693 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6694 if (cstart) { 6695 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6696 } 6697 6698 PetscCall(MatGetRootType_Private(mat, &rtype)); 6699 6700 MatSeqXAIJGetOptions_Private(mpiaij->A); 6701 PetscCall(MatDestroy(&mpiaij->A)); 6702 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6703 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6704 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6705 6706 MatSeqXAIJGetOptions_Private(mpiaij->B); 6707 PetscCall(MatDestroy(&mpiaij->B)); 6708 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6709 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6710 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6711 6712 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6713 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6714 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6715 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6716 6717 a = (Mat_SeqAIJ *)mpiaij->A->data; 6718 b = (Mat_SeqAIJ *)mpiaij->B->data; 6719 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6720 a->free_a = b->free_a = PETSC_TRUE; 6721 a->free_ij = b->free_ij = PETSC_TRUE; 6722 6723 /* conversion must happen AFTER multiply setup */ 6724 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6725 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6726 PetscCall(VecDestroy(&mpiaij->lvec)); 6727 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6728 6729 // Put the COO struct in a container and then attach that to the matrix 6730 PetscCall(PetscMalloc1(1, &coo)); 6731 coo->n = coo_n; 6732 coo->sf = sf2; 6733 coo->sendlen = nleaves; 6734 coo->recvlen = nroots; 6735 coo->Annz = Annz; 6736 coo->Bnnz = Bnnz; 6737 coo->Annz2 = Annz2; 6738 coo->Bnnz2 = Bnnz2; 6739 coo->Atot1 = Atot1; 6740 coo->Atot2 = Atot2; 6741 coo->Btot1 = Btot1; 6742 coo->Btot2 = Btot2; 6743 coo->Ajmap1 = Ajmap1; 6744 coo->Aperm1 = Aperm1; 6745 coo->Bjmap1 = Bjmap1; 6746 coo->Bperm1 = Bperm1; 6747 coo->Aimap2 = Aimap2; 6748 coo->Ajmap2 = Ajmap2; 6749 coo->Aperm2 = Aperm2; 6750 coo->Bimap2 = Bimap2; 6751 coo->Bjmap2 = Bjmap2; 6752 coo->Bperm2 = Bperm2; 6753 coo->Cperm1 = Cperm1; 6754 // Allocate in preallocation. If not used, it has zero cost on host 6755 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6756 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6757 PetscCall(PetscContainerSetPointer(container, coo)); 6758 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6759 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6760 PetscCall(PetscContainerDestroy(&container)); 6761 PetscFunctionReturn(PETSC_SUCCESS); 6762 } 6763 6764 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6765 { 6766 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6767 Mat A = mpiaij->A, B = mpiaij->B; 6768 PetscScalar *Aa, *Ba; 6769 PetscScalar *sendbuf, *recvbuf; 6770 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6771 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6772 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6773 const PetscCount *Cperm1; 6774 PetscContainer container; 6775 MatCOOStruct_MPIAIJ *coo; 6776 6777 PetscFunctionBegin; 6778 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6779 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6780 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6781 sendbuf = coo->sendbuf; 6782 recvbuf = coo->recvbuf; 6783 Ajmap1 = coo->Ajmap1; 6784 Ajmap2 = coo->Ajmap2; 6785 Aimap2 = coo->Aimap2; 6786 Bjmap1 = coo->Bjmap1; 6787 Bjmap2 = coo->Bjmap2; 6788 Bimap2 = coo->Bimap2; 6789 Aperm1 = coo->Aperm1; 6790 Aperm2 = coo->Aperm2; 6791 Bperm1 = coo->Bperm1; 6792 Bperm2 = coo->Bperm2; 6793 Cperm1 = coo->Cperm1; 6794 6795 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6796 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6797 6798 /* Pack entries to be sent to remote */ 6799 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6800 6801 /* Send remote entries to their owner and overlap the communication with local computation */ 6802 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6803 /* Add local entries to A and B */ 6804 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6805 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6806 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6807 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6808 } 6809 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6810 PetscScalar sum = 0.0; 6811 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6812 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6813 } 6814 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6815 6816 /* Add received remote entries to A and B */ 6817 for (PetscCount i = 0; i < coo->Annz2; i++) { 6818 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6819 } 6820 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6821 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6822 } 6823 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6824 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6825 PetscFunctionReturn(PETSC_SUCCESS); 6826 } 6827 6828 /*MC 6829 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6830 6831 Options Database Keys: 6832 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6833 6834 Level: beginner 6835 6836 Notes: 6837 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6838 in this case the values associated with the rows and columns one passes in are set to zero 6839 in the matrix 6840 6841 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6842 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6843 6844 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6845 M*/ 6846 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6847 { 6848 Mat_MPIAIJ *b; 6849 PetscMPIInt size; 6850 6851 PetscFunctionBegin; 6852 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6853 6854 PetscCall(PetscNew(&b)); 6855 B->data = (void *)b; 6856 B->ops[0] = MatOps_Values; 6857 B->assembled = PETSC_FALSE; 6858 B->insertmode = NOT_SET_VALUES; 6859 b->size = size; 6860 6861 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6862 6863 /* build cache for off array entries formed */ 6864 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6865 6866 b->donotstash = PETSC_FALSE; 6867 b->colmap = NULL; 6868 b->garray = NULL; 6869 b->roworiented = PETSC_TRUE; 6870 6871 /* stuff used for matrix vector multiply */ 6872 b->lvec = NULL; 6873 b->Mvctx = NULL; 6874 6875 /* stuff for MatGetRow() */ 6876 b->rowindices = NULL; 6877 b->rowvalues = NULL; 6878 b->getrowactive = PETSC_FALSE; 6879 6880 /* flexible pointer used in CUSPARSE classes */ 6881 b->spptr = NULL; 6882 6883 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6884 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6885 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6886 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6887 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6889 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6893 #if defined(PETSC_HAVE_CUDA) 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6895 #endif 6896 #if defined(PETSC_HAVE_HIP) 6897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6898 #endif 6899 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6901 #endif 6902 #if defined(PETSC_HAVE_MKL_SPARSE) 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6904 #endif 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6909 #if defined(PETSC_HAVE_ELEMENTAL) 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6911 #endif 6912 #if defined(PETSC_HAVE_SCALAPACK) 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6914 #endif 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6917 #if defined(PETSC_HAVE_HYPRE) 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6920 #endif 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6925 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6926 PetscFunctionReturn(PETSC_SUCCESS); 6927 } 6928 6929 /*@C 6930 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6931 and "off-diagonal" part of the matrix in CSR format. 6932 6933 Collective 6934 6935 Input Parameters: 6936 + comm - MPI communicator 6937 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6938 . n - This value should be the same as the local size used in creating the 6939 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6940 calculated if `N` is given) For square matrices `n` is almost always `m`. 6941 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6942 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6943 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6944 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6945 . a - matrix values 6946 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6947 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6948 - oa - matrix values 6949 6950 Output Parameter: 6951 . mat - the matrix 6952 6953 Level: advanced 6954 6955 Notes: 6956 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6957 must free the arrays once the matrix has been destroyed and not before. 6958 6959 The `i` and `j` indices are 0 based 6960 6961 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6962 6963 This sets local rows and cannot be used to set off-processor values. 6964 6965 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6966 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6967 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6968 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6969 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6970 communication if it is known that only local entries will be set. 6971 6972 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6973 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6974 @*/ 6975 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6976 { 6977 Mat_MPIAIJ *maij; 6978 6979 PetscFunctionBegin; 6980 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6981 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6982 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6983 PetscCall(MatCreate(comm, mat)); 6984 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6985 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6986 maij = (Mat_MPIAIJ *)(*mat)->data; 6987 6988 (*mat)->preallocated = PETSC_TRUE; 6989 6990 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6991 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6992 6993 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6994 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6995 6996 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6997 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6998 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6999 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7000 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7001 PetscFunctionReturn(PETSC_SUCCESS); 7002 } 7003 7004 typedef struct { 7005 Mat *mp; /* intermediate products */ 7006 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7007 PetscInt cp; /* number of intermediate products */ 7008 7009 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7010 PetscInt *startsj_s, *startsj_r; 7011 PetscScalar *bufa; 7012 Mat P_oth; 7013 7014 /* may take advantage of merging product->B */ 7015 Mat Bloc; /* B-local by merging diag and off-diag */ 7016 7017 /* cusparse does not have support to split between symbolic and numeric phases. 7018 When api_user is true, we don't need to update the numerical values 7019 of the temporary storage */ 7020 PetscBool reusesym; 7021 7022 /* support for COO values insertion */ 7023 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7024 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7025 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7026 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7027 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7028 PetscMemType mtype; 7029 7030 /* customization */ 7031 PetscBool abmerge; 7032 PetscBool P_oth_bind; 7033 } MatMatMPIAIJBACKEND; 7034 7035 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7036 { 7037 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7038 PetscInt i; 7039 7040 PetscFunctionBegin; 7041 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7042 PetscCall(PetscFree(mmdata->bufa)); 7043 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7044 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7045 PetscCall(MatDestroy(&mmdata->P_oth)); 7046 PetscCall(MatDestroy(&mmdata->Bloc)); 7047 PetscCall(PetscSFDestroy(&mmdata->sf)); 7048 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7049 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7050 PetscCall(PetscFree(mmdata->own[0])); 7051 PetscCall(PetscFree(mmdata->own)); 7052 PetscCall(PetscFree(mmdata->off[0])); 7053 PetscCall(PetscFree(mmdata->off)); 7054 PetscCall(PetscFree(mmdata)); 7055 PetscFunctionReturn(PETSC_SUCCESS); 7056 } 7057 7058 /* Copy selected n entries with indices in idx[] of A to v[]. 7059 If idx is NULL, copy the whole data array of A to v[] 7060 */ 7061 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7062 { 7063 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7064 7065 PetscFunctionBegin; 7066 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7067 if (f) { 7068 PetscCall((*f)(A, n, idx, v)); 7069 } else { 7070 const PetscScalar *vv; 7071 7072 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7073 if (n && idx) { 7074 PetscScalar *w = v; 7075 const PetscInt *oi = idx; 7076 PetscInt j; 7077 7078 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7079 } else { 7080 PetscCall(PetscArraycpy(v, vv, n)); 7081 } 7082 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7083 } 7084 PetscFunctionReturn(PETSC_SUCCESS); 7085 } 7086 7087 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7088 { 7089 MatMatMPIAIJBACKEND *mmdata; 7090 PetscInt i, n_d, n_o; 7091 7092 PetscFunctionBegin; 7093 MatCheckProduct(C, 1); 7094 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7095 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7096 if (!mmdata->reusesym) { /* update temporary matrices */ 7097 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7098 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7099 } 7100 mmdata->reusesym = PETSC_FALSE; 7101 7102 for (i = 0; i < mmdata->cp; i++) { 7103 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7104 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7105 } 7106 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7107 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7108 7109 if (mmdata->mptmp[i]) continue; 7110 if (noff) { 7111 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7112 7113 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7114 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7115 n_o += noff; 7116 n_d += nown; 7117 } else { 7118 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7119 7120 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7121 n_d += mm->nz; 7122 } 7123 } 7124 if (mmdata->hasoffproc) { /* offprocess insertion */ 7125 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7126 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7127 } 7128 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7129 PetscFunctionReturn(PETSC_SUCCESS); 7130 } 7131 7132 /* Support for Pt * A, A * P, or Pt * A * P */ 7133 #define MAX_NUMBER_INTERMEDIATE 4 7134 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7135 { 7136 Mat_Product *product = C->product; 7137 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7138 Mat_MPIAIJ *a, *p; 7139 MatMatMPIAIJBACKEND *mmdata; 7140 ISLocalToGlobalMapping P_oth_l2g = NULL; 7141 IS glob = NULL; 7142 const char *prefix; 7143 char pprefix[256]; 7144 const PetscInt *globidx, *P_oth_idx; 7145 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7146 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7147 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7148 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7149 /* a base offset; type-2: sparse with a local to global map table */ 7150 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7151 7152 MatProductType ptype; 7153 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7154 PetscMPIInt size; 7155 7156 PetscFunctionBegin; 7157 MatCheckProduct(C, 1); 7158 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7159 ptype = product->type; 7160 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7161 ptype = MATPRODUCT_AB; 7162 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7163 } 7164 switch (ptype) { 7165 case MATPRODUCT_AB: 7166 A = product->A; 7167 P = product->B; 7168 m = A->rmap->n; 7169 n = P->cmap->n; 7170 M = A->rmap->N; 7171 N = P->cmap->N; 7172 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7173 break; 7174 case MATPRODUCT_AtB: 7175 P = product->A; 7176 A = product->B; 7177 m = P->cmap->n; 7178 n = A->cmap->n; 7179 M = P->cmap->N; 7180 N = A->cmap->N; 7181 hasoffproc = PETSC_TRUE; 7182 break; 7183 case MATPRODUCT_PtAP: 7184 A = product->A; 7185 P = product->B; 7186 m = P->cmap->n; 7187 n = P->cmap->n; 7188 M = P->cmap->N; 7189 N = P->cmap->N; 7190 hasoffproc = PETSC_TRUE; 7191 break; 7192 default: 7193 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7194 } 7195 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7196 if (size == 1) hasoffproc = PETSC_FALSE; 7197 7198 /* defaults */ 7199 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7200 mp[i] = NULL; 7201 mptmp[i] = PETSC_FALSE; 7202 rmapt[i] = -1; 7203 cmapt[i] = -1; 7204 rmapa[i] = NULL; 7205 cmapa[i] = NULL; 7206 } 7207 7208 /* customization */ 7209 PetscCall(PetscNew(&mmdata)); 7210 mmdata->reusesym = product->api_user; 7211 if (ptype == MATPRODUCT_AB) { 7212 if (product->api_user) { 7213 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7214 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7215 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7216 PetscOptionsEnd(); 7217 } else { 7218 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7219 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7220 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7221 PetscOptionsEnd(); 7222 } 7223 } else if (ptype == MATPRODUCT_PtAP) { 7224 if (product->api_user) { 7225 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7226 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7227 PetscOptionsEnd(); 7228 } else { 7229 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7230 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7231 PetscOptionsEnd(); 7232 } 7233 } 7234 a = (Mat_MPIAIJ *)A->data; 7235 p = (Mat_MPIAIJ *)P->data; 7236 PetscCall(MatSetSizes(C, m, n, M, N)); 7237 PetscCall(PetscLayoutSetUp(C->rmap)); 7238 PetscCall(PetscLayoutSetUp(C->cmap)); 7239 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7240 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7241 7242 cp = 0; 7243 switch (ptype) { 7244 case MATPRODUCT_AB: /* A * P */ 7245 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7246 7247 /* A_diag * P_local (merged or not) */ 7248 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7249 /* P is product->B */ 7250 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7251 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7252 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7253 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7254 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7255 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7256 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7257 mp[cp]->product->api_user = product->api_user; 7258 PetscCall(MatProductSetFromOptions(mp[cp])); 7259 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7260 PetscCall(ISGetIndices(glob, &globidx)); 7261 rmapt[cp] = 1; 7262 cmapt[cp] = 2; 7263 cmapa[cp] = globidx; 7264 mptmp[cp] = PETSC_FALSE; 7265 cp++; 7266 } else { /* A_diag * P_diag and A_diag * P_off */ 7267 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7268 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7269 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7270 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7271 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7272 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7273 mp[cp]->product->api_user = product->api_user; 7274 PetscCall(MatProductSetFromOptions(mp[cp])); 7275 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7276 rmapt[cp] = 1; 7277 cmapt[cp] = 1; 7278 mptmp[cp] = PETSC_FALSE; 7279 cp++; 7280 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7281 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7282 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7283 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7284 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7285 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7286 mp[cp]->product->api_user = product->api_user; 7287 PetscCall(MatProductSetFromOptions(mp[cp])); 7288 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7289 rmapt[cp] = 1; 7290 cmapt[cp] = 2; 7291 cmapa[cp] = p->garray; 7292 mptmp[cp] = PETSC_FALSE; 7293 cp++; 7294 } 7295 7296 /* A_off * P_other */ 7297 if (mmdata->P_oth) { 7298 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7299 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7300 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7301 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7302 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7303 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7304 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7305 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7306 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7307 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7308 mp[cp]->product->api_user = product->api_user; 7309 PetscCall(MatProductSetFromOptions(mp[cp])); 7310 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7311 rmapt[cp] = 1; 7312 cmapt[cp] = 2; 7313 cmapa[cp] = P_oth_idx; 7314 mptmp[cp] = PETSC_FALSE; 7315 cp++; 7316 } 7317 break; 7318 7319 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7320 /* A is product->B */ 7321 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7322 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7323 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7324 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7325 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7326 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7327 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7328 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7329 mp[cp]->product->api_user = product->api_user; 7330 PetscCall(MatProductSetFromOptions(mp[cp])); 7331 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7332 PetscCall(ISGetIndices(glob, &globidx)); 7333 rmapt[cp] = 2; 7334 rmapa[cp] = globidx; 7335 cmapt[cp] = 2; 7336 cmapa[cp] = globidx; 7337 mptmp[cp] = PETSC_FALSE; 7338 cp++; 7339 } else { 7340 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7341 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7342 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7343 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7344 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7345 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7346 mp[cp]->product->api_user = product->api_user; 7347 PetscCall(MatProductSetFromOptions(mp[cp])); 7348 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7349 PetscCall(ISGetIndices(glob, &globidx)); 7350 rmapt[cp] = 1; 7351 cmapt[cp] = 2; 7352 cmapa[cp] = globidx; 7353 mptmp[cp] = PETSC_FALSE; 7354 cp++; 7355 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7356 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7357 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7358 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7359 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7360 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7361 mp[cp]->product->api_user = product->api_user; 7362 PetscCall(MatProductSetFromOptions(mp[cp])); 7363 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7364 rmapt[cp] = 2; 7365 rmapa[cp] = p->garray; 7366 cmapt[cp] = 2; 7367 cmapa[cp] = globidx; 7368 mptmp[cp] = PETSC_FALSE; 7369 cp++; 7370 } 7371 break; 7372 case MATPRODUCT_PtAP: 7373 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7374 /* P is product->B */ 7375 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7376 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7377 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7378 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7379 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7380 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7381 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7382 mp[cp]->product->api_user = product->api_user; 7383 PetscCall(MatProductSetFromOptions(mp[cp])); 7384 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7385 PetscCall(ISGetIndices(glob, &globidx)); 7386 rmapt[cp] = 2; 7387 rmapa[cp] = globidx; 7388 cmapt[cp] = 2; 7389 cmapa[cp] = globidx; 7390 mptmp[cp] = PETSC_FALSE; 7391 cp++; 7392 if (mmdata->P_oth) { 7393 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7394 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7395 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7396 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7397 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7398 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7399 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7400 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7401 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7402 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7403 mp[cp]->product->api_user = product->api_user; 7404 PetscCall(MatProductSetFromOptions(mp[cp])); 7405 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7406 mptmp[cp] = PETSC_TRUE; 7407 cp++; 7408 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7409 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7410 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7411 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7412 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7413 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7414 mp[cp]->product->api_user = product->api_user; 7415 PetscCall(MatProductSetFromOptions(mp[cp])); 7416 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7417 rmapt[cp] = 2; 7418 rmapa[cp] = globidx; 7419 cmapt[cp] = 2; 7420 cmapa[cp] = P_oth_idx; 7421 mptmp[cp] = PETSC_FALSE; 7422 cp++; 7423 } 7424 break; 7425 default: 7426 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7427 } 7428 /* sanity check */ 7429 if (size > 1) 7430 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7431 7432 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7433 for (i = 0; i < cp; i++) { 7434 mmdata->mp[i] = mp[i]; 7435 mmdata->mptmp[i] = mptmp[i]; 7436 } 7437 mmdata->cp = cp; 7438 C->product->data = mmdata; 7439 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7440 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7441 7442 /* memory type */ 7443 mmdata->mtype = PETSC_MEMTYPE_HOST; 7444 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7445 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7446 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7447 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7448 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7449 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7450 7451 /* prepare coo coordinates for values insertion */ 7452 7453 /* count total nonzeros of those intermediate seqaij Mats 7454 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7455 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7456 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7457 */ 7458 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7459 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7460 if (mptmp[cp]) continue; 7461 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7462 const PetscInt *rmap = rmapa[cp]; 7463 const PetscInt mr = mp[cp]->rmap->n; 7464 const PetscInt rs = C->rmap->rstart; 7465 const PetscInt re = C->rmap->rend; 7466 const PetscInt *ii = mm->i; 7467 for (i = 0; i < mr; i++) { 7468 const PetscInt gr = rmap[i]; 7469 const PetscInt nz = ii[i + 1] - ii[i]; 7470 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7471 else ncoo_oown += nz; /* this row is local */ 7472 } 7473 } else ncoo_d += mm->nz; 7474 } 7475 7476 /* 7477 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7478 7479 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7480 7481 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7482 7483 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7484 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7485 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7486 7487 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7488 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7489 */ 7490 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7491 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7492 7493 /* gather (i,j) of nonzeros inserted by remote procs */ 7494 if (hasoffproc) { 7495 PetscSF msf; 7496 PetscInt ncoo2, *coo_i2, *coo_j2; 7497 7498 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7499 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7500 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7501 7502 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7503 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7504 PetscInt *idxoff = mmdata->off[cp]; 7505 PetscInt *idxown = mmdata->own[cp]; 7506 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7507 const PetscInt *rmap = rmapa[cp]; 7508 const PetscInt *cmap = cmapa[cp]; 7509 const PetscInt *ii = mm->i; 7510 PetscInt *coi = coo_i + ncoo_o; 7511 PetscInt *coj = coo_j + ncoo_o; 7512 const PetscInt mr = mp[cp]->rmap->n; 7513 const PetscInt rs = C->rmap->rstart; 7514 const PetscInt re = C->rmap->rend; 7515 const PetscInt cs = C->cmap->rstart; 7516 for (i = 0; i < mr; i++) { 7517 const PetscInt *jj = mm->j + ii[i]; 7518 const PetscInt gr = rmap[i]; 7519 const PetscInt nz = ii[i + 1] - ii[i]; 7520 if (gr < rs || gr >= re) { /* this is an offproc row */ 7521 for (j = ii[i]; j < ii[i + 1]; j++) { 7522 *coi++ = gr; 7523 *idxoff++ = j; 7524 } 7525 if (!cmapt[cp]) { /* already global */ 7526 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7527 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7528 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7529 } else { /* offdiag */ 7530 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7531 } 7532 ncoo_o += nz; 7533 } else { /* this is a local row */ 7534 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7535 } 7536 } 7537 } 7538 mmdata->off[cp + 1] = idxoff; 7539 mmdata->own[cp + 1] = idxown; 7540 } 7541 7542 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7543 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7544 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7545 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7546 ncoo = ncoo_d + ncoo_oown + ncoo2; 7547 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7548 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7549 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7550 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7551 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7552 PetscCall(PetscFree2(coo_i, coo_j)); 7553 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7554 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7555 coo_i = coo_i2; 7556 coo_j = coo_j2; 7557 } else { /* no offproc values insertion */ 7558 ncoo = ncoo_d; 7559 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7560 7561 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7562 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7563 PetscCall(PetscSFSetUp(mmdata->sf)); 7564 } 7565 mmdata->hasoffproc = hasoffproc; 7566 7567 /* gather (i,j) of nonzeros inserted locally */ 7568 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7569 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7570 PetscInt *coi = coo_i + ncoo_d; 7571 PetscInt *coj = coo_j + ncoo_d; 7572 const PetscInt *jj = mm->j; 7573 const PetscInt *ii = mm->i; 7574 const PetscInt *cmap = cmapa[cp]; 7575 const PetscInt *rmap = rmapa[cp]; 7576 const PetscInt mr = mp[cp]->rmap->n; 7577 const PetscInt rs = C->rmap->rstart; 7578 const PetscInt re = C->rmap->rend; 7579 const PetscInt cs = C->cmap->rstart; 7580 7581 if (mptmp[cp]) continue; 7582 if (rmapt[cp] == 1) { /* consecutive rows */ 7583 /* fill coo_i */ 7584 for (i = 0; i < mr; i++) { 7585 const PetscInt gr = i + rs; 7586 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7587 } 7588 /* fill coo_j */ 7589 if (!cmapt[cp]) { /* type-0, already global */ 7590 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7591 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7592 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7593 } else { /* type-2, local to global for sparse columns */ 7594 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7595 } 7596 ncoo_d += mm->nz; 7597 } else if (rmapt[cp] == 2) { /* sparse rows */ 7598 for (i = 0; i < mr; i++) { 7599 const PetscInt *jj = mm->j + ii[i]; 7600 const PetscInt gr = rmap[i]; 7601 const PetscInt nz = ii[i + 1] - ii[i]; 7602 if (gr >= rs && gr < re) { /* local rows */ 7603 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7604 if (!cmapt[cp]) { /* type-0, already global */ 7605 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7606 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7607 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7608 } else { /* type-2, local to global for sparse columns */ 7609 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7610 } 7611 ncoo_d += nz; 7612 } 7613 } 7614 } 7615 } 7616 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7617 PetscCall(ISDestroy(&glob)); 7618 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7619 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7620 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7621 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7622 7623 /* preallocate with COO data */ 7624 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7625 PetscCall(PetscFree2(coo_i, coo_j)); 7626 PetscFunctionReturn(PETSC_SUCCESS); 7627 } 7628 7629 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7630 { 7631 Mat_Product *product = mat->product; 7632 #if defined(PETSC_HAVE_DEVICE) 7633 PetscBool match = PETSC_FALSE; 7634 PetscBool usecpu = PETSC_FALSE; 7635 #else 7636 PetscBool match = PETSC_TRUE; 7637 #endif 7638 7639 PetscFunctionBegin; 7640 MatCheckProduct(mat, 1); 7641 #if defined(PETSC_HAVE_DEVICE) 7642 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7643 if (match) { /* we can always fallback to the CPU if requested */ 7644 switch (product->type) { 7645 case MATPRODUCT_AB: 7646 if (product->api_user) { 7647 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7648 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7649 PetscOptionsEnd(); 7650 } else { 7651 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7652 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7653 PetscOptionsEnd(); 7654 } 7655 break; 7656 case MATPRODUCT_AtB: 7657 if (product->api_user) { 7658 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7659 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7660 PetscOptionsEnd(); 7661 } else { 7662 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7663 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7664 PetscOptionsEnd(); 7665 } 7666 break; 7667 case MATPRODUCT_PtAP: 7668 if (product->api_user) { 7669 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7670 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7671 PetscOptionsEnd(); 7672 } else { 7673 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7674 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7675 PetscOptionsEnd(); 7676 } 7677 break; 7678 default: 7679 break; 7680 } 7681 match = (PetscBool)!usecpu; 7682 } 7683 #endif 7684 if (match) { 7685 switch (product->type) { 7686 case MATPRODUCT_AB: 7687 case MATPRODUCT_AtB: 7688 case MATPRODUCT_PtAP: 7689 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7690 break; 7691 default: 7692 break; 7693 } 7694 } 7695 /* fallback to MPIAIJ ops */ 7696 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7697 PetscFunctionReturn(PETSC_SUCCESS); 7698 } 7699 7700 /* 7701 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7702 7703 n - the number of block indices in cc[] 7704 cc - the block indices (must be large enough to contain the indices) 7705 */ 7706 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7707 { 7708 PetscInt cnt = -1, nidx, j; 7709 const PetscInt *idx; 7710 7711 PetscFunctionBegin; 7712 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7713 if (nidx) { 7714 cnt = 0; 7715 cc[cnt] = idx[0] / bs; 7716 for (j = 1; j < nidx; j++) { 7717 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7718 } 7719 } 7720 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7721 *n = cnt + 1; 7722 PetscFunctionReturn(PETSC_SUCCESS); 7723 } 7724 7725 /* 7726 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7727 7728 ncollapsed - the number of block indices 7729 collapsed - the block indices (must be large enough to contain the indices) 7730 */ 7731 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7732 { 7733 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7734 7735 PetscFunctionBegin; 7736 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7737 for (i = start + 1; i < start + bs; i++) { 7738 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7739 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7740 cprevtmp = cprev; 7741 cprev = merged; 7742 merged = cprevtmp; 7743 } 7744 *ncollapsed = nprev; 7745 if (collapsed) *collapsed = cprev; 7746 PetscFunctionReturn(PETSC_SUCCESS); 7747 } 7748 7749 /* 7750 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7751 7752 Input Parameter: 7753 . Amat - matrix 7754 - symmetrize - make the result symmetric 7755 + scale - scale with diagonal 7756 7757 Output Parameter: 7758 . a_Gmat - output scalar graph >= 0 7759 7760 */ 7761 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7762 { 7763 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7764 MPI_Comm comm; 7765 Mat Gmat; 7766 PetscBool ismpiaij, isseqaij; 7767 Mat a, b, c; 7768 MatType jtype; 7769 7770 PetscFunctionBegin; 7771 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7772 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7773 PetscCall(MatGetSize(Amat, &MM, &NN)); 7774 PetscCall(MatGetBlockSize(Amat, &bs)); 7775 nloc = (Iend - Istart) / bs; 7776 7777 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7778 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7779 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7780 7781 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7782 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7783 implementation */ 7784 if (bs > 1) { 7785 PetscCall(MatGetType(Amat, &jtype)); 7786 PetscCall(MatCreate(comm, &Gmat)); 7787 PetscCall(MatSetType(Gmat, jtype)); 7788 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7789 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7790 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7791 PetscInt *d_nnz, *o_nnz; 7792 MatScalar *aa, val, *AA; 7793 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7794 if (isseqaij) { 7795 a = Amat; 7796 b = NULL; 7797 } else { 7798 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7799 a = d->A; 7800 b = d->B; 7801 } 7802 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7803 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7804 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7805 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7806 const PetscInt *cols1, *cols2; 7807 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7808 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7809 nnz[brow / bs] = nc2 / bs; 7810 if (nc2 % bs) ok = 0; 7811 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7812 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7813 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7814 if (nc1 != nc2) ok = 0; 7815 else { 7816 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7817 if (cols1[jj] != cols2[jj]) ok = 0; 7818 if (cols1[jj] % bs != jj % bs) ok = 0; 7819 } 7820 } 7821 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7822 } 7823 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7824 if (!ok) { 7825 PetscCall(PetscFree2(d_nnz, o_nnz)); 7826 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7827 goto old_bs; 7828 } 7829 } 7830 } 7831 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7832 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7833 PetscCall(PetscFree2(d_nnz, o_nnz)); 7834 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7835 // diag 7836 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7837 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7838 ai = aseq->i; 7839 n = ai[brow + 1] - ai[brow]; 7840 aj = aseq->j + ai[brow]; 7841 for (int k = 0; k < n; k += bs) { // block columns 7842 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7843 val = 0; 7844 if (index_size == 0) { 7845 for (int ii = 0; ii < bs; ii++) { // rows in block 7846 aa = aseq->a + ai[brow + ii] + k; 7847 for (int jj = 0; jj < bs; jj++) { // columns in block 7848 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7849 } 7850 } 7851 } else { // use (index,index) value if provided 7852 for (int iii = 0; iii < index_size; iii++) { // rows in block 7853 int ii = index[iii]; 7854 aa = aseq->a + ai[brow + ii] + k; 7855 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7856 int jj = index[jjj]; 7857 val += PetscAbs(PetscRealPart(aa[jj])); 7858 } 7859 } 7860 } 7861 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7862 AA[k / bs] = val; 7863 } 7864 grow = Istart / bs + brow / bs; 7865 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7866 } 7867 // off-diag 7868 if (ismpiaij) { 7869 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7870 const PetscScalar *vals; 7871 const PetscInt *cols, *garray = aij->garray; 7872 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7873 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7874 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7875 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7876 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7877 AA[k / bs] = 0; 7878 AJ[cidx] = garray[cols[k]] / bs; 7879 } 7880 nc = ncols / bs; 7881 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7882 if (index_size == 0) { 7883 for (int ii = 0; ii < bs; ii++) { // rows in block 7884 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7885 for (int k = 0; k < ncols; k += bs) { 7886 for (int jj = 0; jj < bs; jj++) { // cols in block 7887 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7888 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7889 } 7890 } 7891 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7892 } 7893 } else { // use (index,index) value if provided 7894 for (int iii = 0; iii < index_size; iii++) { // rows in block 7895 int ii = index[iii]; 7896 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7897 for (int k = 0; k < ncols; k += bs) { 7898 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7899 int jj = index[jjj]; 7900 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7901 } 7902 } 7903 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7904 } 7905 } 7906 grow = Istart / bs + brow / bs; 7907 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7908 } 7909 } 7910 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7911 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7912 PetscCall(PetscFree2(AA, AJ)); 7913 } else { 7914 const PetscScalar *vals; 7915 const PetscInt *idx; 7916 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7917 old_bs: 7918 /* 7919 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7920 */ 7921 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7922 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7923 if (isseqaij) { 7924 PetscInt max_d_nnz; 7925 /* 7926 Determine exact preallocation count for (sequential) scalar matrix 7927 */ 7928 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7929 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7930 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7931 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7932 PetscCall(PetscFree3(w0, w1, w2)); 7933 } else if (ismpiaij) { 7934 Mat Daij, Oaij; 7935 const PetscInt *garray; 7936 PetscInt max_d_nnz; 7937 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7938 /* 7939 Determine exact preallocation count for diagonal block portion of scalar matrix 7940 */ 7941 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7942 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7943 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7944 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7945 PetscCall(PetscFree3(w0, w1, w2)); 7946 /* 7947 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7948 */ 7949 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7950 o_nnz[jj] = 0; 7951 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7952 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7953 o_nnz[jj] += ncols; 7954 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7955 } 7956 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7957 } 7958 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7959 /* get scalar copy (norms) of matrix */ 7960 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7961 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7962 PetscCall(PetscFree2(d_nnz, o_nnz)); 7963 for (Ii = Istart; Ii < Iend; Ii++) { 7964 PetscInt dest_row = Ii / bs; 7965 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7966 for (jj = 0; jj < ncols; jj++) { 7967 PetscInt dest_col = idx[jj] / bs; 7968 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7969 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7970 } 7971 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7972 } 7973 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7974 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7975 } 7976 } else { 7977 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7978 else { 7979 Gmat = Amat; 7980 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7981 } 7982 if (isseqaij) { 7983 a = Gmat; 7984 b = NULL; 7985 } else { 7986 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7987 a = d->A; 7988 b = d->B; 7989 } 7990 if (filter >= 0 || scale) { 7991 /* take absolute value of each entry */ 7992 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7993 MatInfo info; 7994 PetscScalar *avals; 7995 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7996 PetscCall(MatSeqAIJGetArray(c, &avals)); 7997 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7998 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7999 } 8000 } 8001 } 8002 if (symmetrize) { 8003 PetscBool isset, issym; 8004 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8005 if (!isset || !issym) { 8006 Mat matTrans; 8007 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8008 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8009 PetscCall(MatDestroy(&matTrans)); 8010 } 8011 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8012 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8013 if (scale) { 8014 /* scale c for all diagonal values = 1 or -1 */ 8015 Vec diag; 8016 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8017 PetscCall(MatGetDiagonal(Gmat, diag)); 8018 PetscCall(VecReciprocal(diag)); 8019 PetscCall(VecSqrtAbs(diag)); 8020 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8021 PetscCall(VecDestroy(&diag)); 8022 } 8023 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8024 8025 if (filter >= 0) { 8026 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8027 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8028 } 8029 *a_Gmat = Gmat; 8030 PetscFunctionReturn(PETSC_SUCCESS); 8031 } 8032 8033 /* 8034 Special version for direct calls from Fortran 8035 */ 8036 #include <petsc/private/fortranimpl.h> 8037 8038 /* Change these macros so can be used in void function */ 8039 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8040 #undef PetscCall 8041 #define PetscCall(...) \ 8042 do { \ 8043 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8044 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8045 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8046 return; \ 8047 } \ 8048 } while (0) 8049 8050 #undef SETERRQ 8051 #define SETERRQ(comm, ierr, ...) \ 8052 do { \ 8053 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8054 return; \ 8055 } while (0) 8056 8057 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8058 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8059 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8060 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8061 #else 8062 #endif 8063 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8064 { 8065 Mat mat = *mmat; 8066 PetscInt m = *mm, n = *mn; 8067 InsertMode addv = *maddv; 8068 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8069 PetscScalar value; 8070 8071 MatCheckPreallocated(mat, 1); 8072 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8073 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8074 { 8075 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8076 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8077 PetscBool roworiented = aij->roworiented; 8078 8079 /* Some Variables required in the macro */ 8080 Mat A = aij->A; 8081 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8082 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8083 MatScalar *aa; 8084 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8085 Mat B = aij->B; 8086 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8087 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8088 MatScalar *ba; 8089 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8090 * cannot use "#if defined" inside a macro. */ 8091 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8092 8093 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8094 PetscInt nonew = a->nonew; 8095 MatScalar *ap1, *ap2; 8096 8097 PetscFunctionBegin; 8098 PetscCall(MatSeqAIJGetArray(A, &aa)); 8099 PetscCall(MatSeqAIJGetArray(B, &ba)); 8100 for (i = 0; i < m; i++) { 8101 if (im[i] < 0) continue; 8102 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8103 if (im[i] >= rstart && im[i] < rend) { 8104 row = im[i] - rstart; 8105 lastcol1 = -1; 8106 rp1 = aj + ai[row]; 8107 ap1 = aa + ai[row]; 8108 rmax1 = aimax[row]; 8109 nrow1 = ailen[row]; 8110 low1 = 0; 8111 high1 = nrow1; 8112 lastcol2 = -1; 8113 rp2 = bj + bi[row]; 8114 ap2 = ba + bi[row]; 8115 rmax2 = bimax[row]; 8116 nrow2 = bilen[row]; 8117 low2 = 0; 8118 high2 = nrow2; 8119 8120 for (j = 0; j < n; j++) { 8121 if (roworiented) value = v[i * n + j]; 8122 else value = v[i + j * m]; 8123 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8124 if (in[j] >= cstart && in[j] < cend) { 8125 col = in[j] - cstart; 8126 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8127 } else if (in[j] < 0) continue; 8128 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8129 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8130 } else { 8131 if (mat->was_assembled) { 8132 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8133 #if defined(PETSC_USE_CTABLE) 8134 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8135 col--; 8136 #else 8137 col = aij->colmap[in[j]] - 1; 8138 #endif 8139 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8140 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8141 col = in[j]; 8142 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8143 B = aij->B; 8144 b = (Mat_SeqAIJ *)B->data; 8145 bimax = b->imax; 8146 bi = b->i; 8147 bilen = b->ilen; 8148 bj = b->j; 8149 rp2 = bj + bi[row]; 8150 ap2 = ba + bi[row]; 8151 rmax2 = bimax[row]; 8152 nrow2 = bilen[row]; 8153 low2 = 0; 8154 high2 = nrow2; 8155 bm = aij->B->rmap->n; 8156 ba = b->a; 8157 inserted = PETSC_FALSE; 8158 } 8159 } else col = in[j]; 8160 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8161 } 8162 } 8163 } else if (!aij->donotstash) { 8164 if (roworiented) { 8165 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8166 } else { 8167 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8168 } 8169 } 8170 } 8171 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8172 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8173 } 8174 PetscFunctionReturnVoid(); 8175 } 8176 8177 /* Undefining these here since they were redefined from their original definition above! No 8178 * other PETSc functions should be defined past this point, as it is impossible to recover the 8179 * original definitions */ 8180 #undef PetscCall 8181 #undef SETERRQ 8182