1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 #if defined(PETSC_USE_LOG) 15 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 16 #endif 17 PetscCall(MatStashDestroy_Private(&mat->stash)); 18 PetscCall(VecDestroy(&aij->diag)); 19 PetscCall(MatDestroy(&aij->A)); 20 PetscCall(MatDestroy(&aij->B)); 21 #if defined(PETSC_USE_CTABLE) 22 PetscCall(PetscHMapIDestroy(&aij->colmap)); 23 #else 24 PetscCall(PetscFree(aij->colmap)); 25 #endif 26 PetscCall(PetscFree(aij->garray)); 27 PetscCall(VecDestroy(&aij->lvec)); 28 PetscCall(VecScatterDestroy(&aij->Mvctx)); 29 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 30 PetscCall(PetscFree(aij->ld)); 31 32 /* Free COO */ 33 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 34 35 PetscCall(PetscFree(mat->data)); 36 37 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 38 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 39 40 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 50 #if defined(PETSC_HAVE_CUDA) 51 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 52 #endif 53 #if defined(PETSC_HAVE_HIP) 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 55 #endif 56 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 57 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 58 #endif 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 60 #if defined(PETSC_HAVE_ELEMENTAL) 61 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 62 #endif 63 #if defined(PETSC_HAVE_SCALAPACK) 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 65 #endif 66 #if defined(PETSC_HAVE_HYPRE) 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 69 #endif 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 76 #if defined(PETSC_HAVE_MKL_SPARSE) 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 78 #endif 79 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 82 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 84 PetscFunctionReturn(PETSC_SUCCESS); 85 } 86 87 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 88 #define TYPE AIJ 89 #define TYPE_AIJ 90 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 91 #undef TYPE 92 #undef TYPE_AIJ 93 94 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 95 { 96 Mat B; 97 98 PetscFunctionBegin; 99 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 100 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 101 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 102 PetscCall(MatDestroy(&B)); 103 PetscFunctionReturn(PETSC_SUCCESS); 104 } 105 106 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 107 { 108 Mat B; 109 110 PetscFunctionBegin; 111 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 112 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 113 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 /*MC 118 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 119 120 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 121 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 122 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 123 for communicators controlling multiple processes. It is recommended that you call both of 124 the above preallocation routines for simplicity. 125 126 Options Database Key: 127 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 128 129 Developer Note: 130 Level: beginner 131 132 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 133 enough exist. 134 135 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 136 M*/ 137 138 /*MC 139 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 140 141 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 142 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 143 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 144 for communicators controlling multiple processes. It is recommended that you call both of 145 the above preallocation routines for simplicity. 146 147 Options Database Key: 148 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 149 150 Level: beginner 151 152 .seealso: [](chapter_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 153 M*/ 154 155 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 156 { 157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 158 159 PetscFunctionBegin; 160 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 161 A->boundtocpu = flg; 162 #endif 163 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 164 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 165 166 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 167 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 168 * to differ from the parent matrix. */ 169 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 170 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 171 172 PetscFunctionReturn(PETSC_SUCCESS); 173 } 174 175 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 176 { 177 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 178 179 PetscFunctionBegin; 180 if (mat->A) { 181 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 182 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 183 } 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 188 { 189 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 191 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 192 const PetscInt *ia, *ib; 193 const MatScalar *aa, *bb, *aav, *bav; 194 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 195 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 196 197 PetscFunctionBegin; 198 *keptrows = NULL; 199 200 ia = a->i; 201 ib = b->i; 202 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 203 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 204 for (i = 0; i < m; i++) { 205 na = ia[i + 1] - ia[i]; 206 nb = ib[i + 1] - ib[i]; 207 if (!na && !nb) { 208 cnt++; 209 goto ok1; 210 } 211 aa = aav + ia[i]; 212 for (j = 0; j < na; j++) { 213 if (aa[j] != 0.0) goto ok1; 214 } 215 bb = bav + ib[i]; 216 for (j = 0; j < nb; j++) { 217 if (bb[j] != 0.0) goto ok1; 218 } 219 cnt++; 220 ok1:; 221 } 222 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 223 if (!n0rows) { 224 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 225 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 226 PetscFunctionReturn(PETSC_SUCCESS); 227 } 228 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 229 cnt = 0; 230 for (i = 0; i < m; i++) { 231 na = ia[i + 1] - ia[i]; 232 nb = ib[i + 1] - ib[i]; 233 if (!na && !nb) continue; 234 aa = aav + ia[i]; 235 for (j = 0; j < na; j++) { 236 if (aa[j] != 0.0) { 237 rows[cnt++] = rstart + i; 238 goto ok2; 239 } 240 } 241 bb = bav + ib[i]; 242 for (j = 0; j < nb; j++) { 243 if (bb[j] != 0.0) { 244 rows[cnt++] = rstart + i; 245 goto ok2; 246 } 247 } 248 ok2:; 249 } 250 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 251 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 252 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 253 PetscFunctionReturn(PETSC_SUCCESS); 254 } 255 256 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 257 { 258 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 259 PetscBool cong; 260 261 PetscFunctionBegin; 262 PetscCall(MatHasCongruentLayouts(Y, &cong)); 263 if (Y->assembled && cong) { 264 PetscCall(MatDiagonalSet(aij->A, D, is)); 265 } else { 266 PetscCall(MatDiagonalSet_Default(Y, D, is)); 267 } 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 274 PetscInt i, rstart, nrows, *rows; 275 276 PetscFunctionBegin; 277 *zrows = NULL; 278 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 279 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 280 for (i = 0; i < nrows; i++) rows[i] += rstart; 281 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 288 PetscInt i, m, n, *garray = aij->garray; 289 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 290 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 291 PetscReal *work; 292 const PetscScalar *dummy; 293 294 PetscFunctionBegin; 295 PetscCall(MatGetSize(A, &m, &n)); 296 PetscCall(PetscCalloc1(n, &work)); 297 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 298 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 299 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 300 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 301 if (type == NORM_2) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 304 } else if (type == NORM_1) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 307 } else if (type == NORM_INFINITY) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 310 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 311 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 312 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 313 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 314 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 315 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 316 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 317 if (type == NORM_INFINITY) { 318 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 319 } else { 320 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 321 } 322 PetscCall(PetscFree(work)); 323 if (type == NORM_2) { 324 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 325 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 326 for (i = 0; i < n; i++) reductions[i] /= m; 327 } 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 332 { 333 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 334 IS sis, gis; 335 const PetscInt *isis, *igis; 336 PetscInt n, *iis, nsis, ngis, rstart, i; 337 338 PetscFunctionBegin; 339 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 340 PetscCall(MatFindNonzeroRows(a->B, &gis)); 341 PetscCall(ISGetSize(gis, &ngis)); 342 PetscCall(ISGetSize(sis, &nsis)); 343 PetscCall(ISGetIndices(sis, &isis)); 344 PetscCall(ISGetIndices(gis, &igis)); 345 346 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 347 PetscCall(PetscArraycpy(iis, igis, ngis)); 348 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 349 n = ngis + nsis; 350 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 351 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 352 for (i = 0; i < n; i++) iis[i] += rstart; 353 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 354 355 PetscCall(ISRestoreIndices(sis, &isis)); 356 PetscCall(ISRestoreIndices(gis, &igis)); 357 PetscCall(ISDestroy(&sis)); 358 PetscCall(ISDestroy(&gis)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to access. 368 */ 369 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 370 { 371 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 372 PetscInt n = aij->B->cmap->n, i; 373 374 PetscFunctionBegin; 375 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 376 #if defined(PETSC_USE_CTABLE) 377 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 378 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 379 #else 380 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 381 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 382 #endif 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 387 { \ 388 if (col <= lastcol1) low1 = 0; \ 389 else high1 = nrow1; \ 390 lastcol1 = col; \ 391 while (high1 - low1 > 5) { \ 392 t = (low1 + high1) / 2; \ 393 if (rp1[t] > col) high1 = t; \ 394 else low1 = t; \ 395 } \ 396 for (_i = low1; _i < high1; _i++) { \ 397 if (rp1[_i] > col) break; \ 398 if (rp1[_i] == col) { \ 399 if (addv == ADD_VALUES) { \ 400 ap1[_i] += value; \ 401 /* Not sure LogFlops will slow dow the code or not */ \ 402 (void)PetscLogFlops(1.0); \ 403 } else ap1[_i] = value; \ 404 goto a_noinsert; \ 405 } \ 406 } \ 407 if (value == 0.0 && ignorezeroentries && row != col) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 if (nonew == 1) { \ 413 low1 = 0; \ 414 high1 = nrow1; \ 415 goto a_noinsert; \ 416 } \ 417 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 418 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 419 N = nrow1++ - 1; \ 420 a->nz++; \ 421 high1++; \ 422 /* shift up all the later entries in this row */ \ 423 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 424 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 425 rp1[_i] = col; \ 426 ap1[_i] = value; \ 427 A->nonzerostate++; \ 428 a_noinsert:; \ 429 ailen[row] = nrow1; \ 430 } 431 432 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 433 { \ 434 if (col <= lastcol2) low2 = 0; \ 435 else high2 = nrow2; \ 436 lastcol2 = col; \ 437 while (high2 - low2 > 5) { \ 438 t = (low2 + high2) / 2; \ 439 if (rp2[t] > col) high2 = t; \ 440 else low2 = t; \ 441 } \ 442 for (_i = low2; _i < high2; _i++) { \ 443 if (rp2[_i] > col) break; \ 444 if (rp2[_i] == col) { \ 445 if (addv == ADD_VALUES) { \ 446 ap2[_i] += value; \ 447 (void)PetscLogFlops(1.0); \ 448 } else ap2[_i] = value; \ 449 goto b_noinsert; \ 450 } \ 451 } \ 452 if (value == 0.0 && ignorezeroentries) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 if (nonew == 1) { \ 458 low2 = 0; \ 459 high2 = nrow2; \ 460 goto b_noinsert; \ 461 } \ 462 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 463 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 464 N = nrow2++ - 1; \ 465 b->nz++; \ 466 high2++; \ 467 /* shift up all the later entries in this row */ \ 468 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 469 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 470 rp2[_i] = col; \ 471 ap2[_i] = value; \ 472 B->nonzerostate++; \ 473 b_noinsert:; \ 474 bilen[row] = nrow2; \ 475 } 476 477 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 478 { 479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 480 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 481 PetscInt l, *garray = mat->garray, diag; 482 PetscScalar *aa, *ba; 483 484 PetscFunctionBegin; 485 /* code only works for square matrices A */ 486 487 /* find size of row to the left of the diagonal part */ 488 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 489 row = row - diag; 490 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 491 if (garray[b->j[b->i[row] + l]] > diag) break; 492 } 493 if (l) { 494 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 495 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 496 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 497 } 498 499 /* diagonal part */ 500 if (a->i[row + 1] - a->i[row]) { 501 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 502 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 503 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 504 } 505 506 /* right of diagonal part */ 507 if (b->i[row + 1] - b->i[row] - l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 PetscFunctionReturn(PETSC_SUCCESS); 513 } 514 515 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 516 { 517 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 518 PetscScalar value = 0.0; 519 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 520 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 521 PetscBool roworiented = aij->roworiented; 522 523 /* Some Variables required in the macro */ 524 Mat A = aij->A; 525 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 526 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 527 PetscBool ignorezeroentries = a->ignorezeroentries; 528 Mat B = aij->B; 529 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 530 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 531 MatScalar *aa, *ba; 532 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 533 PetscInt nonew; 534 MatScalar *ap1, *ap2; 535 536 PetscFunctionBegin; 537 PetscCall(MatSeqAIJGetArray(A, &aa)); 538 PetscCall(MatSeqAIJGetArray(B, &ba)); 539 for (i = 0; i < m; i++) { 540 if (im[i] < 0) continue; 541 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 542 if (im[i] >= rstart && im[i] < rend) { 543 row = im[i] - rstart; 544 lastcol1 = -1; 545 rp1 = aj + ai[row]; 546 ap1 = aa + ai[row]; 547 rmax1 = aimax[row]; 548 nrow1 = ailen[row]; 549 low1 = 0; 550 high1 = nrow1; 551 lastcol2 = -1; 552 rp2 = bj + bi[row]; 553 ap2 = ba + bi[row]; 554 rmax2 = bimax[row]; 555 nrow2 = bilen[row]; 556 low2 = 0; 557 high2 = nrow2; 558 559 for (j = 0; j < n; j++) { 560 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 561 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 562 if (in[j] >= cstart && in[j] < cend) { 563 col = in[j] - cstart; 564 nonew = a->nonew; 565 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 566 } else if (in[j] < 0) { 567 continue; 568 } else { 569 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 570 if (mat->was_assembled) { 571 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 572 #if defined(PETSC_USE_CTABLE) 573 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 574 col--; 575 #else 576 col = aij->colmap[in[j]] - 1; 577 #endif 578 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 579 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 580 col = in[j]; 581 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 582 B = aij->B; 583 b = (Mat_SeqAIJ *)B->data; 584 bimax = b->imax; 585 bi = b->i; 586 bilen = b->ilen; 587 bj = b->j; 588 ba = b->a; 589 rp2 = bj + bi[row]; 590 ap2 = ba + bi[row]; 591 rmax2 = bimax[row]; 592 nrow2 = bilen[row]; 593 low2 = 0; 594 high2 = nrow2; 595 bm = aij->B->rmap->n; 596 ba = b->a; 597 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 598 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 599 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 600 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 601 } 602 } else col = in[j]; 603 nonew = b->nonew; 604 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 605 } 606 } 607 } else { 608 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 609 if (!aij->donotstash) { 610 mat->assembled = PETSC_FALSE; 611 if (roworiented) { 612 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 613 } else { 614 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 615 } 616 } 617 } 618 } 619 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 620 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 621 PetscFunctionReturn(PETSC_SUCCESS); 622 } 623 624 /* 625 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 626 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 627 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 628 */ 629 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 632 Mat A = aij->A; /* diagonal part of the matrix */ 633 Mat B = aij->B; /* offdiagonal part of the matrix */ 634 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 635 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 636 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 637 PetscInt *ailen = a->ilen, *aj = a->j; 638 PetscInt *bilen = b->ilen, *bj = b->j; 639 PetscInt am = aij->A->rmap->n, j; 640 PetscInt diag_so_far = 0, dnz; 641 PetscInt offd_so_far = 0, onz; 642 643 PetscFunctionBegin; 644 /* Iterate over all rows of the matrix */ 645 for (j = 0; j < am; j++) { 646 dnz = onz = 0; 647 /* Iterate over all non-zero columns of the current row */ 648 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 649 /* If column is in the diagonal */ 650 if (mat_j[col] >= cstart && mat_j[col] < cend) { 651 aj[diag_so_far++] = mat_j[col] - cstart; 652 dnz++; 653 } else { /* off-diagonal entries */ 654 bj[offd_so_far++] = mat_j[col]; 655 onz++; 656 } 657 } 658 ailen[j] = dnz; 659 bilen[j] = onz; 660 } 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 /* 665 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 666 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 667 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 668 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 669 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 670 */ 671 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 672 { 673 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 674 Mat A = aij->A; /* diagonal part of the matrix */ 675 Mat B = aij->B; /* offdiagonal part of the matrix */ 676 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 677 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 679 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 680 PetscInt *ailen = a->ilen, *aj = a->j; 681 PetscInt *bilen = b->ilen, *bj = b->j; 682 PetscInt am = aij->A->rmap->n, j; 683 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 684 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 685 PetscScalar *aa = a->a, *ba = b->a; 686 687 PetscFunctionBegin; 688 /* Iterate over all rows of the matrix */ 689 for (j = 0; j < am; j++) { 690 dnz_row = onz_row = 0; 691 rowstart_offd = full_offd_i[j]; 692 rowstart_diag = full_diag_i[j]; 693 /* Iterate over all non-zero columns of the current row */ 694 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 695 /* If column is in the diagonal */ 696 if (mat_j[col] >= cstart && mat_j[col] < cend) { 697 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 698 aa[rowstart_diag + dnz_row] = mat_a[col]; 699 dnz_row++; 700 } else { /* off-diagonal entries */ 701 bj[rowstart_offd + onz_row] = mat_j[col]; 702 ba[rowstart_offd + onz_row] = mat_a[col]; 703 onz_row++; 704 } 705 } 706 ailen[j] = dnz_row; 707 bilen[j] = onz_row; 708 } 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 715 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 716 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 717 718 PetscFunctionBegin; 719 for (i = 0; i < m; i++) { 720 if (idxm[i] < 0) continue; /* negative row */ 721 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 722 if (idxm[i] >= rstart && idxm[i] < rend) { 723 row = idxm[i] - rstart; 724 for (j = 0; j < n; j++) { 725 if (idxn[j] < 0) continue; /* negative column */ 726 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 727 if (idxn[j] >= cstart && idxn[j] < cend) { 728 col = idxn[j] - cstart; 729 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 730 } else { 731 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 732 #if defined(PETSC_USE_CTABLE) 733 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 734 col--; 735 #else 736 col = aij->colmap[idxn[j]] - 1; 737 #endif 738 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 739 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 740 } 741 } 742 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 743 } 744 PetscFunctionReturn(PETSC_SUCCESS); 745 } 746 747 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 748 { 749 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 750 PetscInt nstash, reallocs; 751 752 PetscFunctionBegin; 753 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 754 755 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 756 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 757 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 758 PetscFunctionReturn(PETSC_SUCCESS); 759 } 760 761 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 762 { 763 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 764 PetscMPIInt n; 765 PetscInt i, j, rstart, ncols, flg; 766 PetscInt *row, *col; 767 PetscBool other_disassembled; 768 PetscScalar *val; 769 770 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 771 772 PetscFunctionBegin; 773 if (!aij->donotstash && !mat->nooffprocentries) { 774 while (1) { 775 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 776 if (!flg) break; 777 778 for (i = 0; i < n;) { 779 /* Now identify the consecutive vals belonging to the same row */ 780 for (j = i, rstart = row[j]; j < n; j++) { 781 if (row[j] != rstart) break; 782 } 783 if (j < n) ncols = j - i; 784 else ncols = n - i; 785 /* Now assemble all these values with a single function call */ 786 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 787 i = j; 788 } 789 } 790 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 791 } 792 #if defined(PETSC_HAVE_DEVICE) 793 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 794 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 795 if (mat->boundtocpu) { 796 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 797 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 798 } 799 #endif 800 PetscCall(MatAssemblyBegin(aij->A, mode)); 801 PetscCall(MatAssemblyEnd(aij->A, mode)); 802 803 /* determine if any processor has disassembled, if so we must 804 also disassemble ourself, in order that we may reassemble. */ 805 /* 806 if nonzero structure of submatrix B cannot change then we know that 807 no processor disassembled thus we can skip this stuff 808 */ 809 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 810 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 811 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 812 PetscCall(MatDisAssemble_MPIAIJ(mat)); 813 } 814 } 815 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 816 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 817 #if defined(PETSC_HAVE_DEVICE) 818 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 819 #endif 820 PetscCall(MatAssemblyBegin(aij->B, mode)); 821 PetscCall(MatAssemblyEnd(aij->B, mode)); 822 823 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 824 825 aij->rowvalues = NULL; 826 827 PetscCall(VecDestroy(&aij->diag)); 828 829 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 830 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 831 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 832 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 833 } 834 #if defined(PETSC_HAVE_DEVICE) 835 mat->offloadmask = PETSC_OFFLOAD_BOTH; 836 #endif 837 PetscFunctionReturn(PETSC_SUCCESS); 838 } 839 840 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 841 { 842 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 843 844 PetscFunctionBegin; 845 PetscCall(MatZeroEntries(l->A)); 846 PetscCall(MatZeroEntries(l->B)); 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 851 { 852 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 853 PetscObjectState sA, sB; 854 PetscInt *lrows; 855 PetscInt r, len; 856 PetscBool cong, lch, gch; 857 858 PetscFunctionBegin; 859 /* get locally owned rows */ 860 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 861 PetscCall(MatHasCongruentLayouts(A, &cong)); 862 /* fix right hand side if needed */ 863 if (x && b) { 864 const PetscScalar *xx; 865 PetscScalar *bb; 866 867 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 868 PetscCall(VecGetArrayRead(x, &xx)); 869 PetscCall(VecGetArray(b, &bb)); 870 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 871 PetscCall(VecRestoreArrayRead(x, &xx)); 872 PetscCall(VecRestoreArray(b, &bb)); 873 } 874 875 sA = mat->A->nonzerostate; 876 sB = mat->B->nonzerostate; 877 878 if (diag != 0.0 && cong) { 879 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 880 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 881 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 882 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 883 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 884 PetscInt nnwA, nnwB; 885 PetscBool nnzA, nnzB; 886 887 nnwA = aijA->nonew; 888 nnwB = aijB->nonew; 889 nnzA = aijA->keepnonzeropattern; 890 nnzB = aijB->keepnonzeropattern; 891 if (!nnzA) { 892 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 893 aijA->nonew = 0; 894 } 895 if (!nnzB) { 896 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 897 aijB->nonew = 0; 898 } 899 /* Must zero here before the next loop */ 900 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 901 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 902 for (r = 0; r < len; ++r) { 903 const PetscInt row = lrows[r] + A->rmap->rstart; 904 if (row >= A->cmap->N) continue; 905 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 906 } 907 aijA->nonew = nnwA; 908 aijB->nonew = nnwB; 909 } else { 910 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 911 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 912 } 913 PetscCall(PetscFree(lrows)); 914 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 915 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 916 917 /* reduce nonzerostate */ 918 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 919 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 920 if (gch) A->nonzerostate++; 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 924 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 925 { 926 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 927 PetscMPIInt n = A->rmap->n; 928 PetscInt i, j, r, m, len = 0; 929 PetscInt *lrows, *owners = A->rmap->range; 930 PetscMPIInt p = 0; 931 PetscSFNode *rrows; 932 PetscSF sf; 933 const PetscScalar *xx; 934 PetscScalar *bb, *mask, *aij_a; 935 Vec xmask, lmask; 936 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 937 const PetscInt *aj, *ii, *ridx; 938 PetscScalar *aa; 939 940 PetscFunctionBegin; 941 /* Create SF where leaves are input rows and roots are owned rows */ 942 PetscCall(PetscMalloc1(n, &lrows)); 943 for (r = 0; r < n; ++r) lrows[r] = -1; 944 PetscCall(PetscMalloc1(N, &rrows)); 945 for (r = 0; r < N; ++r) { 946 const PetscInt idx = rows[r]; 947 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 948 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 949 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 950 } 951 rrows[r].rank = p; 952 rrows[r].index = rows[r] - owners[p]; 953 } 954 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 955 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 956 /* Collect flags for rows to be zeroed */ 957 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 958 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 959 PetscCall(PetscSFDestroy(&sf)); 960 /* Compress and put in row numbers */ 961 for (r = 0; r < n; ++r) 962 if (lrows[r] >= 0) lrows[len++] = r; 963 /* zero diagonal part of matrix */ 964 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 965 /* handle off diagonal part of matrix */ 966 PetscCall(MatCreateVecs(A, &xmask, NULL)); 967 PetscCall(VecDuplicate(l->lvec, &lmask)); 968 PetscCall(VecGetArray(xmask, &bb)); 969 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 970 PetscCall(VecRestoreArray(xmask, &bb)); 971 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 972 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 973 PetscCall(VecDestroy(&xmask)); 974 if (x && b) { /* this code is buggy when the row and column layout don't match */ 975 PetscBool cong; 976 977 PetscCall(MatHasCongruentLayouts(A, &cong)); 978 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 979 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 980 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 981 PetscCall(VecGetArrayRead(l->lvec, &xx)); 982 PetscCall(VecGetArray(b, &bb)); 983 } 984 PetscCall(VecGetArray(lmask, &mask)); 985 /* remove zeroed rows of off diagonal matrix */ 986 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 987 ii = aij->i; 988 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 989 /* loop over all elements of off process part of matrix zeroing removed columns*/ 990 if (aij->compressedrow.use) { 991 m = aij->compressedrow.nrows; 992 ii = aij->compressedrow.i; 993 ridx = aij->compressedrow.rindex; 994 for (i = 0; i < m; i++) { 995 n = ii[i + 1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij_a + ii[i]; 998 999 for (j = 0; j < n; j++) { 1000 if (PetscAbsScalar(mask[*aj])) { 1001 if (b) bb[*ridx] -= *aa * xx[*aj]; 1002 *aa = 0.0; 1003 } 1004 aa++; 1005 aj++; 1006 } 1007 ridx++; 1008 } 1009 } else { /* do not use compressed row format */ 1010 m = l->B->rmap->n; 1011 for (i = 0; i < m; i++) { 1012 n = ii[i + 1] - ii[i]; 1013 aj = aij->j + ii[i]; 1014 aa = aij_a + ii[i]; 1015 for (j = 0; j < n; j++) { 1016 if (PetscAbsScalar(mask[*aj])) { 1017 if (b) bb[i] -= *aa * xx[*aj]; 1018 *aa = 0.0; 1019 } 1020 aa++; 1021 aj++; 1022 } 1023 } 1024 } 1025 if (x && b) { 1026 PetscCall(VecRestoreArray(b, &bb)); 1027 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1028 } 1029 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1030 PetscCall(VecRestoreArray(lmask, &mask)); 1031 PetscCall(VecDestroy(&lmask)); 1032 PetscCall(PetscFree(lrows)); 1033 1034 /* only change matrix nonzero state if pattern was allowed to be changed */ 1035 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1036 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1037 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1038 } 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } 1041 1042 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1045 PetscInt nt; 1046 VecScatter Mvctx = a->Mvctx; 1047 1048 PetscFunctionBegin; 1049 PetscCall(VecGetLocalSize(xx, &nt)); 1050 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1051 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1052 PetscUseTypeMethod(a->A, mult, xx, yy); 1053 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1054 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1064 PetscFunctionReturn(PETSC_SUCCESS); 1065 } 1066 1067 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1070 VecScatter Mvctx = a->Mvctx; 1071 1072 PetscFunctionBegin; 1073 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1074 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1075 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1076 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1077 PetscFunctionReturn(PETSC_SUCCESS); 1078 } 1079 1080 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1083 1084 PetscFunctionBegin; 1085 /* do nondiagonal part */ 1086 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1087 /* do local part */ 1088 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1089 /* add partial results together */ 1090 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1091 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1092 PetscFunctionReturn(PETSC_SUCCESS); 1093 } 1094 1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1096 { 1097 MPI_Comm comm; 1098 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1099 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1100 IS Me, Notme; 1101 PetscInt M, N, first, last, *notme, i; 1102 PetscBool lf; 1103 PetscMPIInt size; 1104 1105 PetscFunctionBegin; 1106 /* Easy test: symmetric diagonal block */ 1107 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1108 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1109 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1110 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1111 PetscCallMPI(MPI_Comm_size(comm, &size)); 1112 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1113 1114 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1115 PetscCall(MatGetSize(Amat, &M, &N)); 1116 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1117 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1118 for (i = 0; i < first; i++) notme[i] = i; 1119 for (i = last; i < M; i++) notme[i - last + first] = i; 1120 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1121 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1122 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1123 Aoff = Aoffs[0]; 1124 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1125 Boff = Boffs[0]; 1126 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1127 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1128 PetscCall(MatDestroyMatrices(1, &Boffs)); 1129 PetscCall(ISDestroy(&Me)); 1130 PetscCall(ISDestroy(&Notme)); 1131 PetscCall(PetscFree(notme)); 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1136 { 1137 PetscFunctionBegin; 1138 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1139 PetscFunctionReturn(PETSC_SUCCESS); 1140 } 1141 1142 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1145 1146 PetscFunctionBegin; 1147 /* do nondiagonal part */ 1148 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1149 /* do local part */ 1150 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1151 /* add partial results together */ 1152 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1153 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1154 PetscFunctionReturn(PETSC_SUCCESS); 1155 } 1156 1157 /* 1158 This only works correctly for square matrices where the subblock A->A is the 1159 diagonal block 1160 */ 1161 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1162 { 1163 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1164 1165 PetscFunctionBegin; 1166 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1167 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1168 PetscCall(MatGetDiagonal(a->A, v)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1173 { 1174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1175 1176 PetscFunctionBegin; 1177 PetscCall(MatScale(a->A, aa)); 1178 PetscCall(MatScale(a->B, aa)); 1179 PetscFunctionReturn(PETSC_SUCCESS); 1180 } 1181 1182 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1183 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1184 { 1185 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1186 1187 PetscFunctionBegin; 1188 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1189 PetscCall(PetscFree(aij->Aperm1)); 1190 PetscCall(PetscFree(aij->Bperm1)); 1191 PetscCall(PetscFree(aij->Ajmap1)); 1192 PetscCall(PetscFree(aij->Bjmap1)); 1193 1194 PetscCall(PetscFree(aij->Aimap2)); 1195 PetscCall(PetscFree(aij->Bimap2)); 1196 PetscCall(PetscFree(aij->Aperm2)); 1197 PetscCall(PetscFree(aij->Bperm2)); 1198 PetscCall(PetscFree(aij->Ajmap2)); 1199 PetscCall(PetscFree(aij->Bjmap2)); 1200 1201 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1202 PetscCall(PetscFree(aij->Cperm1)); 1203 PetscFunctionReturn(PETSC_SUCCESS); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa, *ba; 1213 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1214 PetscInt64 nz, hnz; 1215 PetscInt *rowlens; 1216 PetscInt *colidxs; 1217 PetscScalar *matvals; 1218 PetscMPIInt rank; 1219 1220 PetscFunctionBegin; 1221 PetscCall(PetscViewerSetUp(viewer)); 1222 1223 M = mat->rmap->N; 1224 N = mat->cmap->N; 1225 m = mat->rmap->n; 1226 rs = mat->rmap->rstart; 1227 cs = mat->cmap->rstart; 1228 nz = A->nz + B->nz; 1229 1230 /* write matrix header */ 1231 header[0] = MAT_FILE_CLASSID; 1232 header[1] = M; 1233 header[2] = N; 1234 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1235 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1236 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1237 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1238 1239 /* fill in and store row lengths */ 1240 PetscCall(PetscMalloc1(m, &rowlens)); 1241 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1242 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1243 PetscCall(PetscFree(rowlens)); 1244 1245 /* fill in and store column indices */ 1246 PetscCall(PetscMalloc1(nz, &colidxs)); 1247 for (cnt = 0, i = 0; i < m; i++) { 1248 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1249 if (garray[B->j[jb]] > cs) break; 1250 colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1253 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1254 } 1255 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1256 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1257 PetscCall(PetscFree(colidxs)); 1258 1259 /* fill in and store nonzero values */ 1260 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1261 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1262 PetscCall(PetscMalloc1(nz, &matvals)); 1263 for (cnt = 0, i = 0; i < m; i++) { 1264 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1265 if (garray[B->j[jb]] > cs) break; 1266 matvals[cnt++] = ba[jb]; 1267 } 1268 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1269 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1270 } 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1272 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1273 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1274 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1275 PetscCall(PetscFree(matvals)); 1276 1277 /* write block size option to the viewer's .info file */ 1278 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 1282 #include <petscdraw.h> 1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1286 PetscMPIInt rank = aij->rank, size = aij->size; 1287 PetscBool isdraw, iascii, isbinary; 1288 PetscViewer sviewer; 1289 PetscViewerFormat format; 1290 1291 PetscFunctionBegin; 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1294 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1295 if (iascii) { 1296 PetscCall(PetscViewerGetFormat(viewer, &format)); 1297 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1298 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1299 PetscCall(PetscMalloc1(size, &nz)); 1300 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1301 for (i = 0; i < (PetscInt)size; i++) { 1302 nmax = PetscMax(nmax, nz[i]); 1303 nmin = PetscMin(nmin, nz[i]); 1304 navg += nz[i]; 1305 } 1306 PetscCall(PetscFree(nz)); 1307 navg = navg / size; 1308 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1309 PetscFunctionReturn(PETSC_SUCCESS); 1310 } 1311 PetscCall(PetscViewerGetFormat(viewer, &format)); 1312 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1313 MatInfo info; 1314 PetscInt *inodes = NULL; 1315 1316 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1317 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1318 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1319 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1320 if (!inodes) { 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1322 (double)info.memory)); 1323 } else { 1324 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1325 (double)info.memory)); 1326 } 1327 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1328 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1329 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1330 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1331 PetscCall(PetscViewerFlush(viewer)); 1332 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1333 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1334 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1335 PetscFunctionReturn(PETSC_SUCCESS); 1336 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1337 PetscInt inodecount, inodelimit, *inodes; 1338 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1339 if (inodes) { 1340 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1341 } else { 1342 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1343 } 1344 PetscFunctionReturn(PETSC_SUCCESS); 1345 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1346 PetscFunctionReturn(PETSC_SUCCESS); 1347 } 1348 } else if (isbinary) { 1349 if (size == 1) { 1350 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1351 PetscCall(MatView(aij->A, viewer)); 1352 } else { 1353 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1354 } 1355 PetscFunctionReturn(PETSC_SUCCESS); 1356 } else if (iascii && size == 1) { 1357 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1358 PetscCall(MatView(aij->A, viewer)); 1359 PetscFunctionReturn(PETSC_SUCCESS); 1360 } else if (isdraw) { 1361 PetscDraw draw; 1362 PetscBool isnull; 1363 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1364 PetscCall(PetscDrawIsNull(draw, &isnull)); 1365 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1366 } 1367 1368 { /* assemble the entire matrix onto first processor */ 1369 Mat A = NULL, Av; 1370 IS isrow, iscol; 1371 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1373 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1374 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1375 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1376 /* The commented code uses MatCreateSubMatrices instead */ 1377 /* 1378 Mat *AA, A = NULL, Av; 1379 IS isrow,iscol; 1380 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1382 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1383 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1384 if (rank == 0) { 1385 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1386 A = AA[0]; 1387 Av = AA[0]; 1388 } 1389 PetscCall(MatDestroySubMatrices(1,&AA)); 1390 */ 1391 PetscCall(ISDestroy(&iscol)); 1392 PetscCall(ISDestroy(&isrow)); 1393 /* 1394 Everyone has to call to draw the matrix since the graphics waits are 1395 synchronized across all processors that share the PetscDraw object 1396 */ 1397 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1398 if (rank == 0) { 1399 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1400 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1401 } 1402 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1403 PetscCall(PetscViewerFlush(viewer)); 1404 PetscCall(MatDestroy(&A)); 1405 } 1406 PetscFunctionReturn(PETSC_SUCCESS); 1407 } 1408 1409 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1410 { 1411 PetscBool iascii, isdraw, issocket, isbinary; 1412 1413 PetscFunctionBegin; 1414 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1418 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1419 PetscFunctionReturn(PETSC_SUCCESS); 1420 } 1421 1422 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1423 { 1424 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1425 Vec bb1 = NULL; 1426 PetscBool hasop; 1427 1428 PetscFunctionBegin; 1429 if (flag == SOR_APPLY_UPPER) { 1430 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1431 PetscFunctionReturn(PETSC_SUCCESS); 1432 } 1433 1434 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1435 1436 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1437 if (flag & SOR_ZERO_INITIAL_GUESS) { 1438 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1439 its--; 1440 } 1441 1442 while (its--) { 1443 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1444 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1445 1446 /* update rhs: bb1 = bb - B*x */ 1447 PetscCall(VecScale(mat->lvec, -1.0)); 1448 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1449 1450 /* local sweep */ 1451 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1452 } 1453 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1454 if (flag & SOR_ZERO_INITIAL_GUESS) { 1455 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1456 its--; 1457 } 1458 while (its--) { 1459 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1460 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1461 1462 /* update rhs: bb1 = bb - B*x */ 1463 PetscCall(VecScale(mat->lvec, -1.0)); 1464 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1465 1466 /* local sweep */ 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1468 } 1469 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1470 if (flag & SOR_ZERO_INITIAL_GUESS) { 1471 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1472 its--; 1473 } 1474 while (its--) { 1475 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1476 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1477 1478 /* update rhs: bb1 = bb - B*x */ 1479 PetscCall(VecScale(mat->lvec, -1.0)); 1480 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1481 1482 /* local sweep */ 1483 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1484 } 1485 } else if (flag & SOR_EISENSTAT) { 1486 Vec xx1; 1487 1488 PetscCall(VecDuplicate(bb, &xx1)); 1489 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1490 1491 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1492 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1493 if (!mat->diag) { 1494 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1495 PetscCall(MatGetDiagonal(matin, mat->diag)); 1496 } 1497 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1498 if (hasop) { 1499 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1500 } else { 1501 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1502 } 1503 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1504 1505 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1506 1507 /* local sweep */ 1508 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1509 PetscCall(VecAXPY(xx, 1.0, xx1)); 1510 PetscCall(VecDestroy(&xx1)); 1511 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1512 1513 PetscCall(VecDestroy(&bb1)); 1514 1515 matin->factorerrortype = mat->A->factorerrortype; 1516 PetscFunctionReturn(PETSC_SUCCESS); 1517 } 1518 1519 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1520 { 1521 Mat aA, aB, Aperm; 1522 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1523 PetscScalar *aa, *ba; 1524 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1525 PetscSF rowsf, sf; 1526 IS parcolp = NULL; 1527 PetscBool done; 1528 1529 PetscFunctionBegin; 1530 PetscCall(MatGetLocalSize(A, &m, &n)); 1531 PetscCall(ISGetIndices(rowp, &rwant)); 1532 PetscCall(ISGetIndices(colp, &cwant)); 1533 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1534 1535 /* Invert row permutation to find out where my rows should go */ 1536 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1537 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1538 PetscCall(PetscSFSetFromOptions(rowsf)); 1539 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1540 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1541 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1542 1543 /* Invert column permutation to find out where my columns should go */ 1544 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1545 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1546 PetscCall(PetscSFSetFromOptions(sf)); 1547 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1548 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1549 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1550 PetscCall(PetscSFDestroy(&sf)); 1551 1552 PetscCall(ISRestoreIndices(rowp, &rwant)); 1553 PetscCall(ISRestoreIndices(colp, &cwant)); 1554 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1555 1556 /* Find out where my gcols should go */ 1557 PetscCall(MatGetSize(aB, NULL, &ng)); 1558 PetscCall(PetscMalloc1(ng, &gcdest)); 1559 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1560 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1561 PetscCall(PetscSFSetFromOptions(sf)); 1562 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1563 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1564 PetscCall(PetscSFDestroy(&sf)); 1565 1566 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1567 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1568 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1569 for (i = 0; i < m; i++) { 1570 PetscInt row = rdest[i]; 1571 PetscMPIInt rowner; 1572 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1573 for (j = ai[i]; j < ai[i + 1]; j++) { 1574 PetscInt col = cdest[aj[j]]; 1575 PetscMPIInt cowner; 1576 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1577 if (rowner == cowner) dnnz[i]++; 1578 else onnz[i]++; 1579 } 1580 for (j = bi[i]; j < bi[i + 1]; j++) { 1581 PetscInt col = gcdest[bj[j]]; 1582 PetscMPIInt cowner; 1583 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1584 if (rowner == cowner) dnnz[i]++; 1585 else onnz[i]++; 1586 } 1587 } 1588 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1589 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1590 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1591 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1592 PetscCall(PetscSFDestroy(&rowsf)); 1593 1594 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1595 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1596 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1597 for (i = 0; i < m; i++) { 1598 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1599 PetscInt j0, rowlen; 1600 rowlen = ai[i + 1] - ai[i]; 1601 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1602 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1603 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1604 } 1605 rowlen = bi[i + 1] - bi[i]; 1606 for (j0 = j = 0; j < rowlen; j0 = j) { 1607 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1608 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1609 } 1610 } 1611 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1612 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1613 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1614 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1615 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1616 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1617 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1618 PetscCall(PetscFree3(work, rdest, cdest)); 1619 PetscCall(PetscFree(gcdest)); 1620 if (parcolp) PetscCall(ISDestroy(&colp)); 1621 *B = Aperm; 1622 PetscFunctionReturn(PETSC_SUCCESS); 1623 } 1624 1625 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1626 { 1627 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1628 1629 PetscFunctionBegin; 1630 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1631 if (ghosts) *ghosts = aij->garray; 1632 PetscFunctionReturn(PETSC_SUCCESS); 1633 } 1634 1635 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1636 { 1637 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1638 Mat A = mat->A, B = mat->B; 1639 PetscLogDouble isend[5], irecv[5]; 1640 1641 PetscFunctionBegin; 1642 info->block_size = 1.0; 1643 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1644 1645 isend[0] = info->nz_used; 1646 isend[1] = info->nz_allocated; 1647 isend[2] = info->nz_unneeded; 1648 isend[3] = info->memory; 1649 isend[4] = info->mallocs; 1650 1651 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1652 1653 isend[0] += info->nz_used; 1654 isend[1] += info->nz_allocated; 1655 isend[2] += info->nz_unneeded; 1656 isend[3] += info->memory; 1657 isend[4] += info->mallocs; 1658 if (flag == MAT_LOCAL) { 1659 info->nz_used = isend[0]; 1660 info->nz_allocated = isend[1]; 1661 info->nz_unneeded = isend[2]; 1662 info->memory = isend[3]; 1663 info->mallocs = isend[4]; 1664 } else if (flag == MAT_GLOBAL_MAX) { 1665 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1666 1667 info->nz_used = irecv[0]; 1668 info->nz_allocated = irecv[1]; 1669 info->nz_unneeded = irecv[2]; 1670 info->memory = irecv[3]; 1671 info->mallocs = irecv[4]; 1672 } else if (flag == MAT_GLOBAL_SUM) { 1673 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1674 1675 info->nz_used = irecv[0]; 1676 info->nz_allocated = irecv[1]; 1677 info->nz_unneeded = irecv[2]; 1678 info->memory = irecv[3]; 1679 info->mallocs = irecv[4]; 1680 } 1681 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1682 info->fill_ratio_needed = 0; 1683 info->factor_mallocs = 0; 1684 PetscFunctionReturn(PETSC_SUCCESS); 1685 } 1686 1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1688 { 1689 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1690 1691 PetscFunctionBegin; 1692 switch (op) { 1693 case MAT_NEW_NONZERO_LOCATIONS: 1694 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1695 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1696 case MAT_KEEP_NONZERO_PATTERN: 1697 case MAT_NEW_NONZERO_LOCATION_ERR: 1698 case MAT_USE_INODES: 1699 case MAT_IGNORE_ZERO_ENTRIES: 1700 case MAT_FORM_EXPLICIT_TRANSPOSE: 1701 MatCheckPreallocated(A, 1); 1702 PetscCall(MatSetOption(a->A, op, flg)); 1703 PetscCall(MatSetOption(a->B, op, flg)); 1704 break; 1705 case MAT_ROW_ORIENTED: 1706 MatCheckPreallocated(A, 1); 1707 a->roworiented = flg; 1708 1709 PetscCall(MatSetOption(a->A, op, flg)); 1710 PetscCall(MatSetOption(a->B, op, flg)); 1711 break; 1712 case MAT_FORCE_DIAGONAL_ENTRIES: 1713 case MAT_SORTED_FULL: 1714 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1715 break; 1716 case MAT_IGNORE_OFF_PROC_ENTRIES: 1717 a->donotstash = flg; 1718 break; 1719 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1720 case MAT_SPD: 1721 case MAT_SYMMETRIC: 1722 case MAT_STRUCTURALLY_SYMMETRIC: 1723 case MAT_HERMITIAN: 1724 case MAT_SYMMETRY_ETERNAL: 1725 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1726 case MAT_SPD_ETERNAL: 1727 /* if the diagonal matrix is square it inherits some of the properties above */ 1728 break; 1729 case MAT_SUBMAT_SINGLEIS: 1730 A->submat_singleis = flg; 1731 break; 1732 case MAT_STRUCTURE_ONLY: 1733 /* The option is handled directly by MatSetOption() */ 1734 break; 1735 default: 1736 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1737 } 1738 PetscFunctionReturn(PETSC_SUCCESS); 1739 } 1740 1741 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1744 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1745 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1746 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1747 PetscInt *cmap, *idx_p; 1748 1749 PetscFunctionBegin; 1750 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1751 mat->getrowactive = PETSC_TRUE; 1752 1753 if (!mat->rowvalues && (idx || v)) { 1754 /* 1755 allocate enough space to hold information from the longest row. 1756 */ 1757 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1758 PetscInt max = 1, tmp; 1759 for (i = 0; i < matin->rmap->n; i++) { 1760 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1761 if (max < tmp) max = tmp; 1762 } 1763 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1764 } 1765 1766 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1767 lrow = row - rstart; 1768 1769 pvA = &vworkA; 1770 pcA = &cworkA; 1771 pvB = &vworkB; 1772 pcB = &cworkB; 1773 if (!v) { 1774 pvA = NULL; 1775 pvB = NULL; 1776 } 1777 if (!idx) { 1778 pcA = NULL; 1779 if (!v) pcB = NULL; 1780 } 1781 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1782 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1783 nztot = nzA + nzB; 1784 1785 cmap = mat->garray; 1786 if (v || idx) { 1787 if (nztot) { 1788 /* Sort by increasing column numbers, assuming A and B already sorted */ 1789 PetscInt imark = -1; 1790 if (v) { 1791 *v = v_p = mat->rowvalues; 1792 for (i = 0; i < nzB; i++) { 1793 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1794 else break; 1795 } 1796 imark = i; 1797 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1798 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1799 } 1800 if (idx) { 1801 *idx = idx_p = mat->rowindices; 1802 if (imark > -1) { 1803 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1804 } else { 1805 for (i = 0; i < nzB; i++) { 1806 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1807 else break; 1808 } 1809 imark = i; 1810 } 1811 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1812 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1813 } 1814 } else { 1815 if (idx) *idx = NULL; 1816 if (v) *v = NULL; 1817 } 1818 } 1819 *nz = nztot; 1820 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1821 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1822 PetscFunctionReturn(PETSC_SUCCESS); 1823 } 1824 1825 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1826 { 1827 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1828 1829 PetscFunctionBegin; 1830 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1831 aij->getrowactive = PETSC_FALSE; 1832 PetscFunctionReturn(PETSC_SUCCESS); 1833 } 1834 1835 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1836 { 1837 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1838 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1839 PetscInt i, j, cstart = mat->cmap->rstart; 1840 PetscReal sum = 0.0; 1841 const MatScalar *v, *amata, *bmata; 1842 1843 PetscFunctionBegin; 1844 if (aij->size == 1) { 1845 PetscCall(MatNorm(aij->A, type, norm)); 1846 } else { 1847 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1848 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1849 if (type == NORM_FROBENIUS) { 1850 v = amata; 1851 for (i = 0; i < amat->nz; i++) { 1852 sum += PetscRealPart(PetscConj(*v) * (*v)); 1853 v++; 1854 } 1855 v = bmata; 1856 for (i = 0; i < bmat->nz; i++) { 1857 sum += PetscRealPart(PetscConj(*v) * (*v)); 1858 v++; 1859 } 1860 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1861 *norm = PetscSqrtReal(*norm); 1862 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1863 } else if (type == NORM_1) { /* max column norm */ 1864 PetscReal *tmp, *tmp2; 1865 PetscInt *jj, *garray = aij->garray; 1866 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1867 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1868 *norm = 0.0; 1869 v = amata; 1870 jj = amat->j; 1871 for (j = 0; j < amat->nz; j++) { 1872 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1873 v++; 1874 } 1875 v = bmata; 1876 jj = bmat->j; 1877 for (j = 0; j < bmat->nz; j++) { 1878 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1879 v++; 1880 } 1881 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1882 for (j = 0; j < mat->cmap->N; j++) { 1883 if (tmp2[j] > *norm) *norm = tmp2[j]; 1884 } 1885 PetscCall(PetscFree(tmp)); 1886 PetscCall(PetscFree(tmp2)); 1887 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1888 } else if (type == NORM_INFINITY) { /* max row norm */ 1889 PetscReal ntemp = 0.0; 1890 for (j = 0; j < aij->A->rmap->n; j++) { 1891 v = amata + amat->i[j]; 1892 sum = 0.0; 1893 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1894 sum += PetscAbsScalar(*v); 1895 v++; 1896 } 1897 v = bmata + bmat->i[j]; 1898 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1899 sum += PetscAbsScalar(*v); 1900 v++; 1901 } 1902 if (sum > ntemp) ntemp = sum; 1903 } 1904 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1905 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1906 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1907 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1908 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1909 } 1910 PetscFunctionReturn(PETSC_SUCCESS); 1911 } 1912 1913 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1914 { 1915 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1916 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1917 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1918 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1919 Mat B, A_diag, *B_diag; 1920 const MatScalar *pbv, *bv; 1921 1922 PetscFunctionBegin; 1923 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1924 ma = A->rmap->n; 1925 na = A->cmap->n; 1926 mb = a->B->rmap->n; 1927 nb = a->B->cmap->n; 1928 ai = Aloc->i; 1929 aj = Aloc->j; 1930 bi = Bloc->i; 1931 bj = Bloc->j; 1932 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1933 PetscInt *d_nnz, *g_nnz, *o_nnz; 1934 PetscSFNode *oloc; 1935 PETSC_UNUSED PetscSF sf; 1936 1937 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1938 /* compute d_nnz for preallocation */ 1939 PetscCall(PetscArrayzero(d_nnz, na)); 1940 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1941 /* compute local off-diagonal contributions */ 1942 PetscCall(PetscArrayzero(g_nnz, nb)); 1943 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1944 /* map those to global */ 1945 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1946 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1947 PetscCall(PetscSFSetFromOptions(sf)); 1948 PetscCall(PetscArrayzero(o_nnz, na)); 1949 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1950 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1951 PetscCall(PetscSFDestroy(&sf)); 1952 1953 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1954 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1955 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1956 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1957 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1958 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1959 } else { 1960 B = *matout; 1961 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1962 } 1963 1964 b = (Mat_MPIAIJ *)B->data; 1965 A_diag = a->A; 1966 B_diag = &b->A; 1967 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1968 A_diag_ncol = A_diag->cmap->N; 1969 B_diag_ilen = sub_B_diag->ilen; 1970 B_diag_i = sub_B_diag->i; 1971 1972 /* Set ilen for diagonal of B */ 1973 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1974 1975 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1976 very quickly (=without using MatSetValues), because all writes are local. */ 1977 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1978 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1979 1980 /* copy over the B part */ 1981 PetscCall(PetscMalloc1(bi[mb], &cols)); 1982 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1983 pbv = bv; 1984 row = A->rmap->rstart; 1985 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1986 cols_tmp = cols; 1987 for (i = 0; i < mb; i++) { 1988 ncol = bi[i + 1] - bi[i]; 1989 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1990 row++; 1991 pbv += ncol; 1992 cols_tmp += ncol; 1993 } 1994 PetscCall(PetscFree(cols)); 1995 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1996 1997 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1998 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1999 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2000 *matout = B; 2001 } else { 2002 PetscCall(MatHeaderMerge(A, &B)); 2003 } 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 2008 { 2009 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2010 Mat a = aij->A, b = aij->B; 2011 PetscInt s1, s2, s3; 2012 2013 PetscFunctionBegin; 2014 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2015 if (rr) { 2016 PetscCall(VecGetLocalSize(rr, &s1)); 2017 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2018 /* Overlap communication with computation. */ 2019 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2020 } 2021 if (ll) { 2022 PetscCall(VecGetLocalSize(ll, &s1)); 2023 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2024 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2025 } 2026 /* scale the diagonal block */ 2027 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2028 2029 if (rr) { 2030 /* Do a scatter end and then right scale the off-diagonal block */ 2031 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2032 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2033 } 2034 PetscFunctionReturn(PETSC_SUCCESS); 2035 } 2036 2037 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2038 { 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2040 2041 PetscFunctionBegin; 2042 PetscCall(MatSetUnfactored(a->A)); 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2047 { 2048 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2049 Mat a, b, c, d; 2050 PetscBool flg; 2051 2052 PetscFunctionBegin; 2053 a = matA->A; 2054 b = matA->B; 2055 c = matB->A; 2056 d = matB->B; 2057 2058 PetscCall(MatEqual(a, c, &flg)); 2059 if (flg) PetscCall(MatEqual(b, d, &flg)); 2060 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2061 PetscFunctionReturn(PETSC_SUCCESS); 2062 } 2063 2064 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2065 { 2066 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2067 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2068 2069 PetscFunctionBegin; 2070 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2071 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2072 /* because of the column compression in the off-processor part of the matrix a->B, 2073 the number of columns in a->B and b->B may be different, hence we cannot call 2074 the MatCopy() directly on the two parts. If need be, we can provide a more 2075 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2076 then copying the submatrices */ 2077 PetscCall(MatCopy_Basic(A, B, str)); 2078 } else { 2079 PetscCall(MatCopy(a->A, b->A, str)); 2080 PetscCall(MatCopy(a->B, b->B, str)); 2081 } 2082 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2083 PetscFunctionReturn(PETSC_SUCCESS); 2084 } 2085 2086 /* 2087 Computes the number of nonzeros per row needed for preallocation when X and Y 2088 have different nonzero structure. 2089 */ 2090 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2091 { 2092 PetscInt i, j, k, nzx, nzy; 2093 2094 PetscFunctionBegin; 2095 /* Set the number of nonzeros in the new matrix */ 2096 for (i = 0; i < m; i++) { 2097 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2098 nzx = xi[i + 1] - xi[i]; 2099 nzy = yi[i + 1] - yi[i]; 2100 nnz[i] = 0; 2101 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2102 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2103 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2104 nnz[i]++; 2105 } 2106 for (; k < nzy; k++) nnz[i]++; 2107 } 2108 PetscFunctionReturn(PETSC_SUCCESS); 2109 } 2110 2111 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2112 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2113 { 2114 PetscInt m = Y->rmap->N; 2115 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2116 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2117 2118 PetscFunctionBegin; 2119 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2120 PetscFunctionReturn(PETSC_SUCCESS); 2121 } 2122 2123 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2124 { 2125 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2126 2127 PetscFunctionBegin; 2128 if (str == SAME_NONZERO_PATTERN) { 2129 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2130 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2131 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2132 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2133 } else { 2134 Mat B; 2135 PetscInt *nnz_d, *nnz_o; 2136 2137 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2138 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2139 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2140 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2141 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2142 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2143 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2144 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2145 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2146 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2147 PetscCall(MatHeaderMerge(Y, &B)); 2148 PetscCall(PetscFree(nnz_d)); 2149 PetscCall(PetscFree(nnz_o)); 2150 } 2151 PetscFunctionReturn(PETSC_SUCCESS); 2152 } 2153 2154 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2155 2156 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2157 { 2158 PetscFunctionBegin; 2159 if (PetscDefined(USE_COMPLEX)) { 2160 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2161 2162 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2163 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2164 } 2165 PetscFunctionReturn(PETSC_SUCCESS); 2166 } 2167 2168 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2169 { 2170 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2171 2172 PetscFunctionBegin; 2173 PetscCall(MatRealPart(a->A)); 2174 PetscCall(MatRealPart(a->B)); 2175 PetscFunctionReturn(PETSC_SUCCESS); 2176 } 2177 2178 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2179 { 2180 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2181 2182 PetscFunctionBegin; 2183 PetscCall(MatImaginaryPart(a->A)); 2184 PetscCall(MatImaginaryPart(a->B)); 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 2188 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2189 { 2190 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2191 PetscInt i, *idxb = NULL, m = A->rmap->n; 2192 PetscScalar *va, *vv; 2193 Vec vB, vA; 2194 const PetscScalar *vb; 2195 2196 PetscFunctionBegin; 2197 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2198 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2199 2200 PetscCall(VecGetArrayWrite(vA, &va)); 2201 if (idx) { 2202 for (i = 0; i < m; i++) { 2203 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2204 } 2205 } 2206 2207 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2208 PetscCall(PetscMalloc1(m, &idxb)); 2209 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2210 2211 PetscCall(VecGetArrayWrite(v, &vv)); 2212 PetscCall(VecGetArrayRead(vB, &vb)); 2213 for (i = 0; i < m; i++) { 2214 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2215 vv[i] = vb[i]; 2216 if (idx) idx[i] = a->garray[idxb[i]]; 2217 } else { 2218 vv[i] = va[i]; 2219 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2220 } 2221 } 2222 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2223 PetscCall(VecRestoreArrayWrite(vA, &va)); 2224 PetscCall(VecRestoreArrayRead(vB, &vb)); 2225 PetscCall(PetscFree(idxb)); 2226 PetscCall(VecDestroy(&vA)); 2227 PetscCall(VecDestroy(&vB)); 2228 PetscFunctionReturn(PETSC_SUCCESS); 2229 } 2230 2231 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2232 { 2233 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2234 PetscInt m = A->rmap->n, n = A->cmap->n; 2235 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2236 PetscInt *cmap = mat->garray; 2237 PetscInt *diagIdx, *offdiagIdx; 2238 Vec diagV, offdiagV; 2239 PetscScalar *a, *diagA, *offdiagA; 2240 const PetscScalar *ba, *bav; 2241 PetscInt r, j, col, ncols, *bi, *bj; 2242 Mat B = mat->B; 2243 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2244 2245 PetscFunctionBegin; 2246 /* When a process holds entire A and other processes have no entry */ 2247 if (A->cmap->N == n) { 2248 PetscCall(VecGetArrayWrite(v, &diagA)); 2249 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2250 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2251 PetscCall(VecDestroy(&diagV)); 2252 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2253 PetscFunctionReturn(PETSC_SUCCESS); 2254 } else if (n == 0) { 2255 if (m) { 2256 PetscCall(VecGetArrayWrite(v, &a)); 2257 for (r = 0; r < m; r++) { 2258 a[r] = 0.0; 2259 if (idx) idx[r] = -1; 2260 } 2261 PetscCall(VecRestoreArrayWrite(v, &a)); 2262 } 2263 PetscFunctionReturn(PETSC_SUCCESS); 2264 } 2265 2266 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2267 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2268 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2269 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2270 2271 /* Get offdiagIdx[] for implicit 0.0 */ 2272 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2273 ba = bav; 2274 bi = b->i; 2275 bj = b->j; 2276 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2277 for (r = 0; r < m; r++) { 2278 ncols = bi[r + 1] - bi[r]; 2279 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2280 offdiagA[r] = *ba; 2281 offdiagIdx[r] = cmap[0]; 2282 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2283 offdiagA[r] = 0.0; 2284 2285 /* Find first hole in the cmap */ 2286 for (j = 0; j < ncols; j++) { 2287 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2288 if (col > j && j < cstart) { 2289 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2290 break; 2291 } else if (col > j + n && j >= cstart) { 2292 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2293 break; 2294 } 2295 } 2296 if (j == ncols && ncols < A->cmap->N - n) { 2297 /* a hole is outside compressed Bcols */ 2298 if (ncols == 0) { 2299 if (cstart) { 2300 offdiagIdx[r] = 0; 2301 } else offdiagIdx[r] = cend; 2302 } else { /* ncols > 0 */ 2303 offdiagIdx[r] = cmap[ncols - 1] + 1; 2304 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2305 } 2306 } 2307 } 2308 2309 for (j = 0; j < ncols; j++) { 2310 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2311 offdiagA[r] = *ba; 2312 offdiagIdx[r] = cmap[*bj]; 2313 } 2314 ba++; 2315 bj++; 2316 } 2317 } 2318 2319 PetscCall(VecGetArrayWrite(v, &a)); 2320 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2321 for (r = 0; r < m; ++r) { 2322 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2323 a[r] = diagA[r]; 2324 if (idx) idx[r] = cstart + diagIdx[r]; 2325 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2326 a[r] = diagA[r]; 2327 if (idx) { 2328 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2329 idx[r] = cstart + diagIdx[r]; 2330 } else idx[r] = offdiagIdx[r]; 2331 } 2332 } else { 2333 a[r] = offdiagA[r]; 2334 if (idx) idx[r] = offdiagIdx[r]; 2335 } 2336 } 2337 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2338 PetscCall(VecRestoreArrayWrite(v, &a)); 2339 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2340 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2341 PetscCall(VecDestroy(&diagV)); 2342 PetscCall(VecDestroy(&offdiagV)); 2343 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2344 PetscFunctionReturn(PETSC_SUCCESS); 2345 } 2346 2347 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2348 { 2349 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2350 PetscInt m = A->rmap->n, n = A->cmap->n; 2351 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2352 PetscInt *cmap = mat->garray; 2353 PetscInt *diagIdx, *offdiagIdx; 2354 Vec diagV, offdiagV; 2355 PetscScalar *a, *diagA, *offdiagA; 2356 const PetscScalar *ba, *bav; 2357 PetscInt r, j, col, ncols, *bi, *bj; 2358 Mat B = mat->B; 2359 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2360 2361 PetscFunctionBegin; 2362 /* When a process holds entire A and other processes have no entry */ 2363 if (A->cmap->N == n) { 2364 PetscCall(VecGetArrayWrite(v, &diagA)); 2365 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2366 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2367 PetscCall(VecDestroy(&diagV)); 2368 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2369 PetscFunctionReturn(PETSC_SUCCESS); 2370 } else if (n == 0) { 2371 if (m) { 2372 PetscCall(VecGetArrayWrite(v, &a)); 2373 for (r = 0; r < m; r++) { 2374 a[r] = PETSC_MAX_REAL; 2375 if (idx) idx[r] = -1; 2376 } 2377 PetscCall(VecRestoreArrayWrite(v, &a)); 2378 } 2379 PetscFunctionReturn(PETSC_SUCCESS); 2380 } 2381 2382 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2383 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2384 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2385 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2386 2387 /* Get offdiagIdx[] for implicit 0.0 */ 2388 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2389 ba = bav; 2390 bi = b->i; 2391 bj = b->j; 2392 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2393 for (r = 0; r < m; r++) { 2394 ncols = bi[r + 1] - bi[r]; 2395 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2396 offdiagA[r] = *ba; 2397 offdiagIdx[r] = cmap[0]; 2398 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2399 offdiagA[r] = 0.0; 2400 2401 /* Find first hole in the cmap */ 2402 for (j = 0; j < ncols; j++) { 2403 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2404 if (col > j && j < cstart) { 2405 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2406 break; 2407 } else if (col > j + n && j >= cstart) { 2408 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2409 break; 2410 } 2411 } 2412 if (j == ncols && ncols < A->cmap->N - n) { 2413 /* a hole is outside compressed Bcols */ 2414 if (ncols == 0) { 2415 if (cstart) { 2416 offdiagIdx[r] = 0; 2417 } else offdiagIdx[r] = cend; 2418 } else { /* ncols > 0 */ 2419 offdiagIdx[r] = cmap[ncols - 1] + 1; 2420 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2421 } 2422 } 2423 } 2424 2425 for (j = 0; j < ncols; j++) { 2426 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2427 offdiagA[r] = *ba; 2428 offdiagIdx[r] = cmap[*bj]; 2429 } 2430 ba++; 2431 bj++; 2432 } 2433 } 2434 2435 PetscCall(VecGetArrayWrite(v, &a)); 2436 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2437 for (r = 0; r < m; ++r) { 2438 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2439 a[r] = diagA[r]; 2440 if (idx) idx[r] = cstart + diagIdx[r]; 2441 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2442 a[r] = diagA[r]; 2443 if (idx) { 2444 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2445 idx[r] = cstart + diagIdx[r]; 2446 } else idx[r] = offdiagIdx[r]; 2447 } 2448 } else { 2449 a[r] = offdiagA[r]; 2450 if (idx) idx[r] = offdiagIdx[r]; 2451 } 2452 } 2453 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2454 PetscCall(VecRestoreArrayWrite(v, &a)); 2455 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2456 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2457 PetscCall(VecDestroy(&diagV)); 2458 PetscCall(VecDestroy(&offdiagV)); 2459 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2460 PetscFunctionReturn(PETSC_SUCCESS); 2461 } 2462 2463 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2464 { 2465 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2466 PetscInt m = A->rmap->n, n = A->cmap->n; 2467 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2468 PetscInt *cmap = mat->garray; 2469 PetscInt *diagIdx, *offdiagIdx; 2470 Vec diagV, offdiagV; 2471 PetscScalar *a, *diagA, *offdiagA; 2472 const PetscScalar *ba, *bav; 2473 PetscInt r, j, col, ncols, *bi, *bj; 2474 Mat B = mat->B; 2475 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2476 2477 PetscFunctionBegin; 2478 /* When a process holds entire A and other processes have no entry */ 2479 if (A->cmap->N == n) { 2480 PetscCall(VecGetArrayWrite(v, &diagA)); 2481 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2482 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2483 PetscCall(VecDestroy(&diagV)); 2484 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2485 PetscFunctionReturn(PETSC_SUCCESS); 2486 } else if (n == 0) { 2487 if (m) { 2488 PetscCall(VecGetArrayWrite(v, &a)); 2489 for (r = 0; r < m; r++) { 2490 a[r] = PETSC_MIN_REAL; 2491 if (idx) idx[r] = -1; 2492 } 2493 PetscCall(VecRestoreArrayWrite(v, &a)); 2494 } 2495 PetscFunctionReturn(PETSC_SUCCESS); 2496 } 2497 2498 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2499 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2500 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2501 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2502 2503 /* Get offdiagIdx[] for implicit 0.0 */ 2504 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2505 ba = bav; 2506 bi = b->i; 2507 bj = b->j; 2508 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2509 for (r = 0; r < m; r++) { 2510 ncols = bi[r + 1] - bi[r]; 2511 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2512 offdiagA[r] = *ba; 2513 offdiagIdx[r] = cmap[0]; 2514 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2515 offdiagA[r] = 0.0; 2516 2517 /* Find first hole in the cmap */ 2518 for (j = 0; j < ncols; j++) { 2519 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2520 if (col > j && j < cstart) { 2521 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2522 break; 2523 } else if (col > j + n && j >= cstart) { 2524 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2525 break; 2526 } 2527 } 2528 if (j == ncols && ncols < A->cmap->N - n) { 2529 /* a hole is outside compressed Bcols */ 2530 if (ncols == 0) { 2531 if (cstart) { 2532 offdiagIdx[r] = 0; 2533 } else offdiagIdx[r] = cend; 2534 } else { /* ncols > 0 */ 2535 offdiagIdx[r] = cmap[ncols - 1] + 1; 2536 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2537 } 2538 } 2539 } 2540 2541 for (j = 0; j < ncols; j++) { 2542 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2543 offdiagA[r] = *ba; 2544 offdiagIdx[r] = cmap[*bj]; 2545 } 2546 ba++; 2547 bj++; 2548 } 2549 } 2550 2551 PetscCall(VecGetArrayWrite(v, &a)); 2552 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2553 for (r = 0; r < m; ++r) { 2554 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2555 a[r] = diagA[r]; 2556 if (idx) idx[r] = cstart + diagIdx[r]; 2557 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2558 a[r] = diagA[r]; 2559 if (idx) { 2560 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2561 idx[r] = cstart + diagIdx[r]; 2562 } else idx[r] = offdiagIdx[r]; 2563 } 2564 } else { 2565 a[r] = offdiagA[r]; 2566 if (idx) idx[r] = offdiagIdx[r]; 2567 } 2568 } 2569 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2570 PetscCall(VecRestoreArrayWrite(v, &a)); 2571 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2572 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2573 PetscCall(VecDestroy(&diagV)); 2574 PetscCall(VecDestroy(&offdiagV)); 2575 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2576 PetscFunctionReturn(PETSC_SUCCESS); 2577 } 2578 2579 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2580 { 2581 Mat *dummy; 2582 2583 PetscFunctionBegin; 2584 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2585 *newmat = *dummy; 2586 PetscCall(PetscFree(dummy)); 2587 PetscFunctionReturn(PETSC_SUCCESS); 2588 } 2589 2590 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2591 { 2592 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2593 2594 PetscFunctionBegin; 2595 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2596 A->factorerrortype = a->A->factorerrortype; 2597 PetscFunctionReturn(PETSC_SUCCESS); 2598 } 2599 2600 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2601 { 2602 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2603 2604 PetscFunctionBegin; 2605 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2606 PetscCall(MatSetRandom(aij->A, rctx)); 2607 if (x->assembled) { 2608 PetscCall(MatSetRandom(aij->B, rctx)); 2609 } else { 2610 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2611 } 2612 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2613 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2614 PetscFunctionReturn(PETSC_SUCCESS); 2615 } 2616 2617 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2618 { 2619 PetscFunctionBegin; 2620 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2621 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2622 PetscFunctionReturn(PETSC_SUCCESS); 2623 } 2624 2625 /*@ 2626 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2627 2628 Not Collective 2629 2630 Input Parameter: 2631 . A - the matrix 2632 2633 Output Parameter: 2634 . nz - the number of nonzeros 2635 2636 Level: advanced 2637 2638 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `Mat` 2639 @*/ 2640 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2641 { 2642 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2643 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2644 PetscBool isaij; 2645 2646 PetscFunctionBegin; 2647 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2648 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2649 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2650 PetscFunctionReturn(PETSC_SUCCESS); 2651 } 2652 2653 /*@ 2654 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2655 2656 Collective 2657 2658 Input Parameters: 2659 + A - the matrix 2660 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2661 2662 Level: advanced 2663 2664 .seealso: [](chapter_matrices), `Mat`, `Mat`, `MATMPIAIJ` 2665 @*/ 2666 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2667 { 2668 PetscFunctionBegin; 2669 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2674 { 2675 PetscBool sc = PETSC_FALSE, flg; 2676 2677 PetscFunctionBegin; 2678 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2679 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2680 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2681 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2682 PetscOptionsHeadEnd(); 2683 PetscFunctionReturn(PETSC_SUCCESS); 2684 } 2685 2686 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2687 { 2688 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2689 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2690 2691 PetscFunctionBegin; 2692 if (!Y->preallocated) { 2693 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2694 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2695 PetscInt nonew = aij->nonew; 2696 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2697 aij->nonew = nonew; 2698 } 2699 PetscCall(MatShift_Basic(Y, a)); 2700 PetscFunctionReturn(PETSC_SUCCESS); 2701 } 2702 2703 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2704 { 2705 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2706 2707 PetscFunctionBegin; 2708 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2709 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2710 if (d) { 2711 PetscInt rstart; 2712 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2713 *d += rstart; 2714 } 2715 PetscFunctionReturn(PETSC_SUCCESS); 2716 } 2717 2718 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2719 { 2720 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2721 2722 PetscFunctionBegin; 2723 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2724 PetscFunctionReturn(PETSC_SUCCESS); 2725 } 2726 2727 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2728 { 2729 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2730 2731 PetscFunctionBegin; 2732 PetscCall(MatEliminateZeros(a->A)); 2733 PetscCall(MatEliminateZeros(a->B)); 2734 PetscFunctionReturn(PETSC_SUCCESS); 2735 } 2736 2737 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2738 MatGetRow_MPIAIJ, 2739 MatRestoreRow_MPIAIJ, 2740 MatMult_MPIAIJ, 2741 /* 4*/ MatMultAdd_MPIAIJ, 2742 MatMultTranspose_MPIAIJ, 2743 MatMultTransposeAdd_MPIAIJ, 2744 NULL, 2745 NULL, 2746 NULL, 2747 /*10*/ NULL, 2748 NULL, 2749 NULL, 2750 MatSOR_MPIAIJ, 2751 MatTranspose_MPIAIJ, 2752 /*15*/ MatGetInfo_MPIAIJ, 2753 MatEqual_MPIAIJ, 2754 MatGetDiagonal_MPIAIJ, 2755 MatDiagonalScale_MPIAIJ, 2756 MatNorm_MPIAIJ, 2757 /*20*/ MatAssemblyBegin_MPIAIJ, 2758 MatAssemblyEnd_MPIAIJ, 2759 MatSetOption_MPIAIJ, 2760 MatZeroEntries_MPIAIJ, 2761 /*24*/ MatZeroRows_MPIAIJ, 2762 NULL, 2763 NULL, 2764 NULL, 2765 NULL, 2766 /*29*/ MatSetUp_MPI_Hash, 2767 NULL, 2768 NULL, 2769 MatGetDiagonalBlock_MPIAIJ, 2770 NULL, 2771 /*34*/ MatDuplicate_MPIAIJ, 2772 NULL, 2773 NULL, 2774 NULL, 2775 NULL, 2776 /*39*/ MatAXPY_MPIAIJ, 2777 MatCreateSubMatrices_MPIAIJ, 2778 MatIncreaseOverlap_MPIAIJ, 2779 MatGetValues_MPIAIJ, 2780 MatCopy_MPIAIJ, 2781 /*44*/ MatGetRowMax_MPIAIJ, 2782 MatScale_MPIAIJ, 2783 MatShift_MPIAIJ, 2784 MatDiagonalSet_MPIAIJ, 2785 MatZeroRowsColumns_MPIAIJ, 2786 /*49*/ MatSetRandom_MPIAIJ, 2787 MatGetRowIJ_MPIAIJ, 2788 MatRestoreRowIJ_MPIAIJ, 2789 NULL, 2790 NULL, 2791 /*54*/ MatFDColoringCreate_MPIXAIJ, 2792 NULL, 2793 MatSetUnfactored_MPIAIJ, 2794 MatPermute_MPIAIJ, 2795 NULL, 2796 /*59*/ MatCreateSubMatrix_MPIAIJ, 2797 MatDestroy_MPIAIJ, 2798 MatView_MPIAIJ, 2799 NULL, 2800 NULL, 2801 /*64*/ NULL, 2802 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2803 NULL, 2804 NULL, 2805 NULL, 2806 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2807 MatGetRowMinAbs_MPIAIJ, 2808 NULL, 2809 NULL, 2810 NULL, 2811 NULL, 2812 /*75*/ MatFDColoringApply_AIJ, 2813 MatSetFromOptions_MPIAIJ, 2814 NULL, 2815 NULL, 2816 MatFindZeroDiagonals_MPIAIJ, 2817 /*80*/ NULL, 2818 NULL, 2819 NULL, 2820 /*83*/ MatLoad_MPIAIJ, 2821 MatIsSymmetric_MPIAIJ, 2822 NULL, 2823 NULL, 2824 NULL, 2825 NULL, 2826 /*89*/ NULL, 2827 NULL, 2828 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2829 NULL, 2830 NULL, 2831 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2832 NULL, 2833 NULL, 2834 NULL, 2835 MatBindToCPU_MPIAIJ, 2836 /*99*/ MatProductSetFromOptions_MPIAIJ, 2837 NULL, 2838 NULL, 2839 MatConjugate_MPIAIJ, 2840 NULL, 2841 /*104*/ MatSetValuesRow_MPIAIJ, 2842 MatRealPart_MPIAIJ, 2843 MatImaginaryPart_MPIAIJ, 2844 NULL, 2845 NULL, 2846 /*109*/ NULL, 2847 NULL, 2848 MatGetRowMin_MPIAIJ, 2849 NULL, 2850 MatMissingDiagonal_MPIAIJ, 2851 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2852 NULL, 2853 MatGetGhosts_MPIAIJ, 2854 NULL, 2855 NULL, 2856 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2857 NULL, 2858 NULL, 2859 NULL, 2860 MatGetMultiProcBlock_MPIAIJ, 2861 /*124*/ MatFindNonzeroRows_MPIAIJ, 2862 MatGetColumnReductions_MPIAIJ, 2863 MatInvertBlockDiagonal_MPIAIJ, 2864 MatInvertVariableBlockDiagonal_MPIAIJ, 2865 MatCreateSubMatricesMPI_MPIAIJ, 2866 /*129*/ NULL, 2867 NULL, 2868 NULL, 2869 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2870 NULL, 2871 /*134*/ NULL, 2872 NULL, 2873 NULL, 2874 NULL, 2875 NULL, 2876 /*139*/ MatSetBlockSizes_MPIAIJ, 2877 NULL, 2878 NULL, 2879 MatFDColoringSetUp_MPIXAIJ, 2880 MatFindOffBlockDiagonalEntries_MPIAIJ, 2881 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2882 /*145*/ NULL, 2883 NULL, 2884 NULL, 2885 MatCreateGraph_Simple_AIJ, 2886 NULL, 2887 /*150*/ NULL, 2888 MatEliminateZeros_MPIAIJ}; 2889 2890 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2891 { 2892 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2893 2894 PetscFunctionBegin; 2895 PetscCall(MatStoreValues(aij->A)); 2896 PetscCall(MatStoreValues(aij->B)); 2897 PetscFunctionReturn(PETSC_SUCCESS); 2898 } 2899 2900 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2901 { 2902 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2903 2904 PetscFunctionBegin; 2905 PetscCall(MatRetrieveValues(aij->A)); 2906 PetscCall(MatRetrieveValues(aij->B)); 2907 PetscFunctionReturn(PETSC_SUCCESS); 2908 } 2909 2910 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2911 { 2912 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2913 PetscMPIInt size; 2914 2915 PetscFunctionBegin; 2916 if (B->hash_active) { 2917 PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops)))); 2918 B->hash_active = PETSC_FALSE; 2919 } 2920 PetscCall(PetscLayoutSetUp(B->rmap)); 2921 PetscCall(PetscLayoutSetUp(B->cmap)); 2922 2923 #if defined(PETSC_USE_CTABLE) 2924 PetscCall(PetscHMapIDestroy(&b->colmap)); 2925 #else 2926 PetscCall(PetscFree(b->colmap)); 2927 #endif 2928 PetscCall(PetscFree(b->garray)); 2929 PetscCall(VecDestroy(&b->lvec)); 2930 PetscCall(VecScatterDestroy(&b->Mvctx)); 2931 2932 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2933 PetscCall(MatDestroy(&b->B)); 2934 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2935 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2936 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2937 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2938 2939 PetscCall(MatDestroy(&b->A)); 2940 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2941 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2942 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2943 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2944 2945 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2946 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2947 B->preallocated = PETSC_TRUE; 2948 B->was_assembled = PETSC_FALSE; 2949 B->assembled = PETSC_FALSE; 2950 PetscFunctionReturn(PETSC_SUCCESS); 2951 } 2952 2953 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2954 { 2955 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2956 2957 PetscFunctionBegin; 2958 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2959 PetscCall(PetscLayoutSetUp(B->rmap)); 2960 PetscCall(PetscLayoutSetUp(B->cmap)); 2961 2962 #if defined(PETSC_USE_CTABLE) 2963 PetscCall(PetscHMapIDestroy(&b->colmap)); 2964 #else 2965 PetscCall(PetscFree(b->colmap)); 2966 #endif 2967 PetscCall(PetscFree(b->garray)); 2968 PetscCall(VecDestroy(&b->lvec)); 2969 PetscCall(VecScatterDestroy(&b->Mvctx)); 2970 2971 PetscCall(MatResetPreallocation(b->A)); 2972 PetscCall(MatResetPreallocation(b->B)); 2973 B->preallocated = PETSC_TRUE; 2974 B->was_assembled = PETSC_FALSE; 2975 B->assembled = PETSC_FALSE; 2976 PetscFunctionReturn(PETSC_SUCCESS); 2977 } 2978 2979 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2980 { 2981 Mat mat; 2982 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2983 2984 PetscFunctionBegin; 2985 *newmat = NULL; 2986 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2987 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2988 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2989 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2990 a = (Mat_MPIAIJ *)mat->data; 2991 2992 mat->factortype = matin->factortype; 2993 mat->assembled = matin->assembled; 2994 mat->insertmode = NOT_SET_VALUES; 2995 mat->preallocated = matin->preallocated; 2996 2997 a->size = oldmat->size; 2998 a->rank = oldmat->rank; 2999 a->donotstash = oldmat->donotstash; 3000 a->roworiented = oldmat->roworiented; 3001 a->rowindices = NULL; 3002 a->rowvalues = NULL; 3003 a->getrowactive = PETSC_FALSE; 3004 3005 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3006 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3007 3008 if (oldmat->colmap) { 3009 #if defined(PETSC_USE_CTABLE) 3010 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3011 #else 3012 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3013 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3014 #endif 3015 } else a->colmap = NULL; 3016 if (oldmat->garray) { 3017 PetscInt len; 3018 len = oldmat->B->cmap->n; 3019 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3020 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3021 } else a->garray = NULL; 3022 3023 /* It may happen MatDuplicate is called with a non-assembled matrix 3024 In fact, MatDuplicate only requires the matrix to be preallocated 3025 This may happen inside a DMCreateMatrix_Shell */ 3026 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3027 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3028 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3029 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3030 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3031 *newmat = mat; 3032 PetscFunctionReturn(PETSC_SUCCESS); 3033 } 3034 3035 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3036 { 3037 PetscBool isbinary, ishdf5; 3038 3039 PetscFunctionBegin; 3040 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3041 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3042 /* force binary viewer to load .info file if it has not yet done so */ 3043 PetscCall(PetscViewerSetUp(viewer)); 3044 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3045 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3046 if (isbinary) { 3047 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3048 } else if (ishdf5) { 3049 #if defined(PETSC_HAVE_HDF5) 3050 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3051 #else 3052 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3053 #endif 3054 } else { 3055 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3056 } 3057 PetscFunctionReturn(PETSC_SUCCESS); 3058 } 3059 3060 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3061 { 3062 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3063 PetscInt *rowidxs, *colidxs; 3064 PetscScalar *matvals; 3065 3066 PetscFunctionBegin; 3067 PetscCall(PetscViewerSetUp(viewer)); 3068 3069 /* read in matrix header */ 3070 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3071 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3072 M = header[1]; 3073 N = header[2]; 3074 nz = header[3]; 3075 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3076 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3077 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3078 3079 /* set block sizes from the viewer's .info file */ 3080 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3081 /* set global sizes if not set already */ 3082 if (mat->rmap->N < 0) mat->rmap->N = M; 3083 if (mat->cmap->N < 0) mat->cmap->N = N; 3084 PetscCall(PetscLayoutSetUp(mat->rmap)); 3085 PetscCall(PetscLayoutSetUp(mat->cmap)); 3086 3087 /* check if the matrix sizes are correct */ 3088 PetscCall(MatGetSize(mat, &rows, &cols)); 3089 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3090 3091 /* read in row lengths and build row indices */ 3092 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3093 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3094 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3095 rowidxs[0] = 0; 3096 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3097 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3098 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3099 /* read in column indices and matrix values */ 3100 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3101 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3102 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3103 /* store matrix indices and values */ 3104 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3105 PetscCall(PetscFree(rowidxs)); 3106 PetscCall(PetscFree2(colidxs, matvals)); 3107 PetscFunctionReturn(PETSC_SUCCESS); 3108 } 3109 3110 /* Not scalable because of ISAllGather() unless getting all columns. */ 3111 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3112 { 3113 IS iscol_local; 3114 PetscBool isstride; 3115 PetscMPIInt lisstride = 0, gisstride; 3116 3117 PetscFunctionBegin; 3118 /* check if we are grabbing all columns*/ 3119 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3120 3121 if (isstride) { 3122 PetscInt start, len, mstart, mlen; 3123 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3124 PetscCall(ISGetLocalSize(iscol, &len)); 3125 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3126 if (mstart == start && mlen - mstart == len) lisstride = 1; 3127 } 3128 3129 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3130 if (gisstride) { 3131 PetscInt N; 3132 PetscCall(MatGetSize(mat, NULL, &N)); 3133 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3134 PetscCall(ISSetIdentity(iscol_local)); 3135 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3136 } else { 3137 PetscInt cbs; 3138 PetscCall(ISGetBlockSize(iscol, &cbs)); 3139 PetscCall(ISAllGather(iscol, &iscol_local)); 3140 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3141 } 3142 3143 *isseq = iscol_local; 3144 PetscFunctionReturn(PETSC_SUCCESS); 3145 } 3146 3147 /* 3148 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3149 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3150 3151 Input Parameters: 3152 + mat - matrix 3153 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3154 i.e., mat->rstart <= isrow[i] < mat->rend 3155 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3156 i.e., mat->cstart <= iscol[i] < mat->cend 3157 3158 Output Parameters: 3159 + isrow_d - sequential row index set for retrieving mat->A 3160 . iscol_d - sequential column index set for retrieving mat->A 3161 . iscol_o - sequential column index set for retrieving mat->B 3162 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3163 */ 3164 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3165 { 3166 Vec x, cmap; 3167 const PetscInt *is_idx; 3168 PetscScalar *xarray, *cmaparray; 3169 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3170 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3171 Mat B = a->B; 3172 Vec lvec = a->lvec, lcmap; 3173 PetscInt i, cstart, cend, Bn = B->cmap->N; 3174 MPI_Comm comm; 3175 VecScatter Mvctx = a->Mvctx; 3176 3177 PetscFunctionBegin; 3178 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3179 PetscCall(ISGetLocalSize(iscol, &ncols)); 3180 3181 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3182 PetscCall(MatCreateVecs(mat, &x, NULL)); 3183 PetscCall(VecSet(x, -1.0)); 3184 PetscCall(VecDuplicate(x, &cmap)); 3185 PetscCall(VecSet(cmap, -1.0)); 3186 3187 /* Get start indices */ 3188 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3189 isstart -= ncols; 3190 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3191 3192 PetscCall(ISGetIndices(iscol, &is_idx)); 3193 PetscCall(VecGetArray(x, &xarray)); 3194 PetscCall(VecGetArray(cmap, &cmaparray)); 3195 PetscCall(PetscMalloc1(ncols, &idx)); 3196 for (i = 0; i < ncols; i++) { 3197 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3198 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3199 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3200 } 3201 PetscCall(VecRestoreArray(x, &xarray)); 3202 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3203 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3204 3205 /* Get iscol_d */ 3206 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3207 PetscCall(ISGetBlockSize(iscol, &i)); 3208 PetscCall(ISSetBlockSize(*iscol_d, i)); 3209 3210 /* Get isrow_d */ 3211 PetscCall(ISGetLocalSize(isrow, &m)); 3212 rstart = mat->rmap->rstart; 3213 PetscCall(PetscMalloc1(m, &idx)); 3214 PetscCall(ISGetIndices(isrow, &is_idx)); 3215 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3216 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3217 3218 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3219 PetscCall(ISGetBlockSize(isrow, &i)); 3220 PetscCall(ISSetBlockSize(*isrow_d, i)); 3221 3222 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3223 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3224 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3225 3226 PetscCall(VecDuplicate(lvec, &lcmap)); 3227 3228 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3229 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3230 3231 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3232 /* off-process column indices */ 3233 count = 0; 3234 PetscCall(PetscMalloc1(Bn, &idx)); 3235 PetscCall(PetscMalloc1(Bn, &cmap1)); 3236 3237 PetscCall(VecGetArray(lvec, &xarray)); 3238 PetscCall(VecGetArray(lcmap, &cmaparray)); 3239 for (i = 0; i < Bn; i++) { 3240 if (PetscRealPart(xarray[i]) > -1.0) { 3241 idx[count] = i; /* local column index in off-diagonal part B */ 3242 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3243 count++; 3244 } 3245 } 3246 PetscCall(VecRestoreArray(lvec, &xarray)); 3247 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3248 3249 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3250 /* cannot ensure iscol_o has same blocksize as iscol! */ 3251 3252 PetscCall(PetscFree(idx)); 3253 *garray = cmap1; 3254 3255 PetscCall(VecDestroy(&x)); 3256 PetscCall(VecDestroy(&cmap)); 3257 PetscCall(VecDestroy(&lcmap)); 3258 PetscFunctionReturn(PETSC_SUCCESS); 3259 } 3260 3261 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3263 { 3264 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3265 Mat M = NULL; 3266 MPI_Comm comm; 3267 IS iscol_d, isrow_d, iscol_o; 3268 Mat Asub = NULL, Bsub = NULL; 3269 PetscInt n; 3270 3271 PetscFunctionBegin; 3272 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3273 3274 if (call == MAT_REUSE_MATRIX) { 3275 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3276 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3277 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3278 3279 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3280 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3281 3282 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3283 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3284 3285 /* Update diagonal and off-diagonal portions of submat */ 3286 asub = (Mat_MPIAIJ *)(*submat)->data; 3287 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3288 PetscCall(ISGetLocalSize(iscol_o, &n)); 3289 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3290 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3291 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3292 3293 } else { /* call == MAT_INITIAL_MATRIX) */ 3294 const PetscInt *garray; 3295 PetscInt BsubN; 3296 3297 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3298 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3299 3300 /* Create local submatrices Asub and Bsub */ 3301 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3302 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3303 3304 /* Create submatrix M */ 3305 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3306 3307 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3308 asub = (Mat_MPIAIJ *)M->data; 3309 3310 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3311 n = asub->B->cmap->N; 3312 if (BsubN > n) { 3313 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3314 const PetscInt *idx; 3315 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3316 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3317 3318 PetscCall(PetscMalloc1(n, &idx_new)); 3319 j = 0; 3320 PetscCall(ISGetIndices(iscol_o, &idx)); 3321 for (i = 0; i < n; i++) { 3322 if (j >= BsubN) break; 3323 while (subgarray[i] > garray[j]) j++; 3324 3325 if (subgarray[i] == garray[j]) { 3326 idx_new[i] = idx[j++]; 3327 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3328 } 3329 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3330 3331 PetscCall(ISDestroy(&iscol_o)); 3332 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3333 3334 } else if (BsubN < n) { 3335 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3336 } 3337 3338 PetscCall(PetscFree(garray)); 3339 *submat = M; 3340 3341 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3342 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3343 PetscCall(ISDestroy(&isrow_d)); 3344 3345 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3346 PetscCall(ISDestroy(&iscol_d)); 3347 3348 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3349 PetscCall(ISDestroy(&iscol_o)); 3350 } 3351 PetscFunctionReturn(PETSC_SUCCESS); 3352 } 3353 3354 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3355 { 3356 IS iscol_local = NULL, isrow_d; 3357 PetscInt csize; 3358 PetscInt n, i, j, start, end; 3359 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3360 MPI_Comm comm; 3361 3362 PetscFunctionBegin; 3363 /* If isrow has same processor distribution as mat, 3364 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3365 if (call == MAT_REUSE_MATRIX) { 3366 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3367 if (isrow_d) { 3368 sameRowDist = PETSC_TRUE; 3369 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3370 } else { 3371 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3372 if (iscol_local) { 3373 sameRowDist = PETSC_TRUE; 3374 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3375 } 3376 } 3377 } else { 3378 /* Check if isrow has same processor distribution as mat */ 3379 sameDist[0] = PETSC_FALSE; 3380 PetscCall(ISGetLocalSize(isrow, &n)); 3381 if (!n) { 3382 sameDist[0] = PETSC_TRUE; 3383 } else { 3384 PetscCall(ISGetMinMax(isrow, &i, &j)); 3385 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3386 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3387 } 3388 3389 /* Check if iscol has same processor distribution as mat */ 3390 sameDist[1] = PETSC_FALSE; 3391 PetscCall(ISGetLocalSize(iscol, &n)); 3392 if (!n) { 3393 sameDist[1] = PETSC_TRUE; 3394 } else { 3395 PetscCall(ISGetMinMax(iscol, &i, &j)); 3396 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3397 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3398 } 3399 3400 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3401 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3402 sameRowDist = tsameDist[0]; 3403 } 3404 3405 if (sameRowDist) { 3406 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3407 /* isrow and iscol have same processor distribution as mat */ 3408 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3409 PetscFunctionReturn(PETSC_SUCCESS); 3410 } else { /* sameRowDist */ 3411 /* isrow has same processor distribution as mat */ 3412 if (call == MAT_INITIAL_MATRIX) { 3413 PetscBool sorted; 3414 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3415 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3416 PetscCall(ISGetSize(iscol, &i)); 3417 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3418 3419 PetscCall(ISSorted(iscol_local, &sorted)); 3420 if (sorted) { 3421 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3422 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3423 PetscFunctionReturn(PETSC_SUCCESS); 3424 } 3425 } else { /* call == MAT_REUSE_MATRIX */ 3426 IS iscol_sub; 3427 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3428 if (iscol_sub) { 3429 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3430 PetscFunctionReturn(PETSC_SUCCESS); 3431 } 3432 } 3433 } 3434 } 3435 3436 /* General case: iscol -> iscol_local which has global size of iscol */ 3437 if (call == MAT_REUSE_MATRIX) { 3438 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3439 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3440 } else { 3441 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3442 } 3443 3444 PetscCall(ISGetLocalSize(iscol, &csize)); 3445 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3446 3447 if (call == MAT_INITIAL_MATRIX) { 3448 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3449 PetscCall(ISDestroy(&iscol_local)); 3450 } 3451 PetscFunctionReturn(PETSC_SUCCESS); 3452 } 3453 3454 /*@C 3455 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3456 and "off-diagonal" part of the matrix in CSR format. 3457 3458 Collective 3459 3460 Input Parameters: 3461 + comm - MPI communicator 3462 . A - "diagonal" portion of matrix 3463 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3464 - garray - global index of `B` columns 3465 3466 Output Parameter: 3467 . mat - the matrix, with input `A` as its local diagonal matrix 3468 3469 Level: advanced 3470 3471 Notes: 3472 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3473 3474 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3475 3476 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3477 @*/ 3478 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3479 { 3480 Mat_MPIAIJ *maij; 3481 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3482 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3483 const PetscScalar *oa; 3484 Mat Bnew; 3485 PetscInt m, n, N; 3486 MatType mpi_mat_type; 3487 3488 PetscFunctionBegin; 3489 PetscCall(MatCreate(comm, mat)); 3490 PetscCall(MatGetSize(A, &m, &n)); 3491 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3492 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3493 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3494 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3495 3496 /* Get global columns of mat */ 3497 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3498 3499 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3500 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3501 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3502 PetscCall(MatSetType(*mat, mpi_mat_type)); 3503 3504 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3505 maij = (Mat_MPIAIJ *)(*mat)->data; 3506 3507 (*mat)->preallocated = PETSC_TRUE; 3508 3509 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3510 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3511 3512 /* Set A as diagonal portion of *mat */ 3513 maij->A = A; 3514 3515 nz = oi[m]; 3516 for (i = 0; i < nz; i++) { 3517 col = oj[i]; 3518 oj[i] = garray[col]; 3519 } 3520 3521 /* Set Bnew as off-diagonal portion of *mat */ 3522 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3523 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3524 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3525 bnew = (Mat_SeqAIJ *)Bnew->data; 3526 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3527 maij->B = Bnew; 3528 3529 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3530 3531 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3532 b->free_a = PETSC_FALSE; 3533 b->free_ij = PETSC_FALSE; 3534 PetscCall(MatDestroy(&B)); 3535 3536 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3537 bnew->free_a = PETSC_TRUE; 3538 bnew->free_ij = PETSC_TRUE; 3539 3540 /* condense columns of maij->B */ 3541 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3542 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3543 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3544 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3545 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3546 PetscFunctionReturn(PETSC_SUCCESS); 3547 } 3548 3549 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3550 3551 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3552 { 3553 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3554 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3555 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3556 Mat M, Msub, B = a->B; 3557 MatScalar *aa; 3558 Mat_SeqAIJ *aij; 3559 PetscInt *garray = a->garray, *colsub, Ncols; 3560 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3561 IS iscol_sub, iscmap; 3562 const PetscInt *is_idx, *cmap; 3563 PetscBool allcolumns = PETSC_FALSE; 3564 MPI_Comm comm; 3565 3566 PetscFunctionBegin; 3567 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3568 if (call == MAT_REUSE_MATRIX) { 3569 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3570 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3571 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3572 3573 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3574 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3575 3576 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3577 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3578 3579 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3580 3581 } else { /* call == MAT_INITIAL_MATRIX) */ 3582 PetscBool flg; 3583 3584 PetscCall(ISGetLocalSize(iscol, &n)); 3585 PetscCall(ISGetSize(iscol, &Ncols)); 3586 3587 /* (1) iscol -> nonscalable iscol_local */ 3588 /* Check for special case: each processor gets entire matrix columns */ 3589 PetscCall(ISIdentity(iscol_local, &flg)); 3590 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3591 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3592 if (allcolumns) { 3593 iscol_sub = iscol_local; 3594 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3595 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3596 3597 } else { 3598 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3599 PetscInt *idx, *cmap1, k; 3600 PetscCall(PetscMalloc1(Ncols, &idx)); 3601 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3602 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3603 count = 0; 3604 k = 0; 3605 for (i = 0; i < Ncols; i++) { 3606 j = is_idx[i]; 3607 if (j >= cstart && j < cend) { 3608 /* diagonal part of mat */ 3609 idx[count] = j; 3610 cmap1[count++] = i; /* column index in submat */ 3611 } else if (Bn) { 3612 /* off-diagonal part of mat */ 3613 if (j == garray[k]) { 3614 idx[count] = j; 3615 cmap1[count++] = i; /* column index in submat */ 3616 } else if (j > garray[k]) { 3617 while (j > garray[k] && k < Bn - 1) k++; 3618 if (j == garray[k]) { 3619 idx[count] = j; 3620 cmap1[count++] = i; /* column index in submat */ 3621 } 3622 } 3623 } 3624 } 3625 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3626 3627 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3628 PetscCall(ISGetBlockSize(iscol, &cbs)); 3629 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3630 3631 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3632 } 3633 3634 /* (3) Create sequential Msub */ 3635 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3636 } 3637 3638 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3639 aij = (Mat_SeqAIJ *)(Msub)->data; 3640 ii = aij->i; 3641 PetscCall(ISGetIndices(iscmap, &cmap)); 3642 3643 /* 3644 m - number of local rows 3645 Ncols - number of columns (same on all processors) 3646 rstart - first row in new global matrix generated 3647 */ 3648 PetscCall(MatGetSize(Msub, &m, NULL)); 3649 3650 if (call == MAT_INITIAL_MATRIX) { 3651 /* (4) Create parallel newmat */ 3652 PetscMPIInt rank, size; 3653 PetscInt csize; 3654 3655 PetscCallMPI(MPI_Comm_size(comm, &size)); 3656 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3657 3658 /* 3659 Determine the number of non-zeros in the diagonal and off-diagonal 3660 portions of the matrix in order to do correct preallocation 3661 */ 3662 3663 /* first get start and end of "diagonal" columns */ 3664 PetscCall(ISGetLocalSize(iscol, &csize)); 3665 if (csize == PETSC_DECIDE) { 3666 PetscCall(ISGetSize(isrow, &mglobal)); 3667 if (mglobal == Ncols) { /* square matrix */ 3668 nlocal = m; 3669 } else { 3670 nlocal = Ncols / size + ((Ncols % size) > rank); 3671 } 3672 } else { 3673 nlocal = csize; 3674 } 3675 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3676 rstart = rend - nlocal; 3677 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3678 3679 /* next, compute all the lengths */ 3680 jj = aij->j; 3681 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3682 olens = dlens + m; 3683 for (i = 0; i < m; i++) { 3684 jend = ii[i + 1] - ii[i]; 3685 olen = 0; 3686 dlen = 0; 3687 for (j = 0; j < jend; j++) { 3688 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3689 else dlen++; 3690 jj++; 3691 } 3692 olens[i] = olen; 3693 dlens[i] = dlen; 3694 } 3695 3696 PetscCall(ISGetBlockSize(isrow, &bs)); 3697 PetscCall(ISGetBlockSize(iscol, &cbs)); 3698 3699 PetscCall(MatCreate(comm, &M)); 3700 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3701 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3702 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3703 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3704 PetscCall(PetscFree(dlens)); 3705 3706 } else { /* call == MAT_REUSE_MATRIX */ 3707 M = *newmat; 3708 PetscCall(MatGetLocalSize(M, &i, NULL)); 3709 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3710 PetscCall(MatZeroEntries(M)); 3711 /* 3712 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3713 rather than the slower MatSetValues(). 3714 */ 3715 M->was_assembled = PETSC_TRUE; 3716 M->assembled = PETSC_FALSE; 3717 } 3718 3719 /* (5) Set values of Msub to *newmat */ 3720 PetscCall(PetscMalloc1(count, &colsub)); 3721 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3722 3723 jj = aij->j; 3724 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3725 for (i = 0; i < m; i++) { 3726 row = rstart + i; 3727 nz = ii[i + 1] - ii[i]; 3728 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3729 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3730 jj += nz; 3731 aa += nz; 3732 } 3733 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3734 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3735 3736 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3737 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3738 3739 PetscCall(PetscFree(colsub)); 3740 3741 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3742 if (call == MAT_INITIAL_MATRIX) { 3743 *newmat = M; 3744 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3745 PetscCall(MatDestroy(&Msub)); 3746 3747 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3748 PetscCall(ISDestroy(&iscol_sub)); 3749 3750 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3751 PetscCall(ISDestroy(&iscmap)); 3752 3753 if (iscol_local) { 3754 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3755 PetscCall(ISDestroy(&iscol_local)); 3756 } 3757 } 3758 PetscFunctionReturn(PETSC_SUCCESS); 3759 } 3760 3761 /* 3762 Not great since it makes two copies of the submatrix, first an SeqAIJ 3763 in local and then by concatenating the local matrices the end result. 3764 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3765 3766 This requires a sequential iscol with all indices. 3767 */ 3768 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3769 { 3770 PetscMPIInt rank, size; 3771 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3772 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3773 Mat M, Mreuse; 3774 MatScalar *aa, *vwork; 3775 MPI_Comm comm; 3776 Mat_SeqAIJ *aij; 3777 PetscBool colflag, allcolumns = PETSC_FALSE; 3778 3779 PetscFunctionBegin; 3780 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3781 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3782 PetscCallMPI(MPI_Comm_size(comm, &size)); 3783 3784 /* Check for special case: each processor gets entire matrix columns */ 3785 PetscCall(ISIdentity(iscol, &colflag)); 3786 PetscCall(ISGetLocalSize(iscol, &n)); 3787 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3788 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3789 3790 if (call == MAT_REUSE_MATRIX) { 3791 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3792 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3793 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3794 } else { 3795 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3796 } 3797 3798 /* 3799 m - number of local rows 3800 n - number of columns (same on all processors) 3801 rstart - first row in new global matrix generated 3802 */ 3803 PetscCall(MatGetSize(Mreuse, &m, &n)); 3804 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3805 if (call == MAT_INITIAL_MATRIX) { 3806 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3807 ii = aij->i; 3808 jj = aij->j; 3809 3810 /* 3811 Determine the number of non-zeros in the diagonal and off-diagonal 3812 portions of the matrix in order to do correct preallocation 3813 */ 3814 3815 /* first get start and end of "diagonal" columns */ 3816 if (csize == PETSC_DECIDE) { 3817 PetscCall(ISGetSize(isrow, &mglobal)); 3818 if (mglobal == n) { /* square matrix */ 3819 nlocal = m; 3820 } else { 3821 nlocal = n / size + ((n % size) > rank); 3822 } 3823 } else { 3824 nlocal = csize; 3825 } 3826 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3827 rstart = rend - nlocal; 3828 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3829 3830 /* next, compute all the lengths */ 3831 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3832 olens = dlens + m; 3833 for (i = 0; i < m; i++) { 3834 jend = ii[i + 1] - ii[i]; 3835 olen = 0; 3836 dlen = 0; 3837 for (j = 0; j < jend; j++) { 3838 if (*jj < rstart || *jj >= rend) olen++; 3839 else dlen++; 3840 jj++; 3841 } 3842 olens[i] = olen; 3843 dlens[i] = dlen; 3844 } 3845 PetscCall(MatCreate(comm, &M)); 3846 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3847 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3848 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3849 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3850 PetscCall(PetscFree(dlens)); 3851 } else { 3852 PetscInt ml, nl; 3853 3854 M = *newmat; 3855 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3856 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3857 PetscCall(MatZeroEntries(M)); 3858 /* 3859 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3860 rather than the slower MatSetValues(). 3861 */ 3862 M->was_assembled = PETSC_TRUE; 3863 M->assembled = PETSC_FALSE; 3864 } 3865 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3866 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3867 ii = aij->i; 3868 jj = aij->j; 3869 3870 /* trigger copy to CPU if needed */ 3871 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3872 for (i = 0; i < m; i++) { 3873 row = rstart + i; 3874 nz = ii[i + 1] - ii[i]; 3875 cwork = jj; 3876 jj += nz; 3877 vwork = aa; 3878 aa += nz; 3879 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3880 } 3881 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3882 3883 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3884 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3885 *newmat = M; 3886 3887 /* save submatrix used in processor for next request */ 3888 if (call == MAT_INITIAL_MATRIX) { 3889 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3890 PetscCall(MatDestroy(&Mreuse)); 3891 } 3892 PetscFunctionReturn(PETSC_SUCCESS); 3893 } 3894 3895 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3896 { 3897 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3898 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3899 const PetscInt *JJ; 3900 PetscBool nooffprocentries; 3901 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3902 3903 PetscFunctionBegin; 3904 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3905 3906 PetscCall(PetscLayoutSetUp(B->rmap)); 3907 PetscCall(PetscLayoutSetUp(B->cmap)); 3908 m = B->rmap->n; 3909 cstart = B->cmap->rstart; 3910 cend = B->cmap->rend; 3911 rstart = B->rmap->rstart; 3912 3913 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3914 3915 if (PetscDefined(USE_DEBUG)) { 3916 for (i = 0; i < m; i++) { 3917 nnz = Ii[i + 1] - Ii[i]; 3918 JJ = J + Ii[i]; 3919 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3920 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3921 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3922 } 3923 } 3924 3925 for (i = 0; i < m; i++) { 3926 nnz = Ii[i + 1] - Ii[i]; 3927 JJ = J + Ii[i]; 3928 nnz_max = PetscMax(nnz_max, nnz); 3929 d = 0; 3930 for (j = 0; j < nnz; j++) { 3931 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3932 } 3933 d_nnz[i] = d; 3934 o_nnz[i] = nnz - d; 3935 } 3936 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3937 PetscCall(PetscFree2(d_nnz, o_nnz)); 3938 3939 for (i = 0; i < m; i++) { 3940 ii = i + rstart; 3941 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3942 } 3943 nooffprocentries = B->nooffprocentries; 3944 B->nooffprocentries = PETSC_TRUE; 3945 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3946 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3947 B->nooffprocentries = nooffprocentries; 3948 3949 /* count number of entries below block diagonal */ 3950 PetscCall(PetscFree(Aij->ld)); 3951 PetscCall(PetscCalloc1(m, &ld)); 3952 Aij->ld = ld; 3953 for (i = 0; i < m; i++) { 3954 nnz = Ii[i + 1] - Ii[i]; 3955 j = 0; 3956 while (j < nnz && J[j] < cstart) j++; 3957 ld[i] = j; 3958 J += nnz; 3959 } 3960 3961 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3962 PetscFunctionReturn(PETSC_SUCCESS); 3963 } 3964 3965 /*@ 3966 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3967 (the default parallel PETSc format). 3968 3969 Collective 3970 3971 Input Parameters: 3972 + B - the matrix 3973 . i - the indices into j for the start of each local row (starts with zero) 3974 . j - the column indices for each local row (starts with zero) 3975 - v - optional values in the matrix 3976 3977 Level: developer 3978 3979 Notes: 3980 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3981 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3982 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3983 3984 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3985 3986 The format which is used for the sparse matrix input, is equivalent to a 3987 row-major ordering.. i.e for the following matrix, the input data expected is 3988 as shown 3989 3990 .vb 3991 1 0 0 3992 2 0 3 P0 3993 ------- 3994 4 5 6 P1 3995 3996 Process0 [P0] rows_owned=[0,1] 3997 i = {0,1,3} [size = nrow+1 = 2+1] 3998 j = {0,0,2} [size = 3] 3999 v = {1,2,3} [size = 3] 4000 4001 Process1 [P1] rows_owned=[2] 4002 i = {0,3} [size = nrow+1 = 1+1] 4003 j = {0,1,2} [size = 3] 4004 v = {4,5,6} [size = 3] 4005 .ve 4006 4007 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 4008 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 4009 @*/ 4010 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4011 { 4012 PetscFunctionBegin; 4013 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4014 PetscFunctionReturn(PETSC_SUCCESS); 4015 } 4016 4017 /*@C 4018 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4019 (the default parallel PETSc format). For good matrix assembly performance 4020 the user should preallocate the matrix storage by setting the parameters 4021 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4022 4023 Collective 4024 4025 Input Parameters: 4026 + B - the matrix 4027 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4028 (same value is used for all local rows) 4029 . d_nnz - array containing the number of nonzeros in the various rows of the 4030 DIAGONAL portion of the local submatrix (possibly different for each row) 4031 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4032 The size of this array is equal to the number of local rows, i.e 'm'. 4033 For matrices that will be factored, you must leave room for (and set) 4034 the diagonal entry even if it is zero. 4035 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4036 submatrix (same value is used for all local rows). 4037 - o_nnz - array containing the number of nonzeros in the various rows of the 4038 OFF-DIAGONAL portion of the local submatrix (possibly different for 4039 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4040 structure. The size of this array is equal to the number 4041 of local rows, i.e 'm'. 4042 4043 Usage: 4044 Consider the following 8x8 matrix with 34 non-zero values, that is 4045 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4046 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4047 as follows 4048 4049 .vb 4050 1 2 0 | 0 3 0 | 0 4 4051 Proc0 0 5 6 | 7 0 0 | 8 0 4052 9 0 10 | 11 0 0 | 12 0 4053 ------------------------------------- 4054 13 0 14 | 15 16 17 | 0 0 4055 Proc1 0 18 0 | 19 20 21 | 0 0 4056 0 0 0 | 22 23 0 | 24 0 4057 ------------------------------------- 4058 Proc2 25 26 27 | 0 0 28 | 29 0 4059 30 0 0 | 31 32 33 | 0 34 4060 .ve 4061 4062 This can be represented as a collection of submatrices as 4063 .vb 4064 A B C 4065 D E F 4066 G H I 4067 .ve 4068 4069 Where the submatrices A,B,C are owned by proc0, D,E,F are 4070 owned by proc1, G,H,I are owned by proc2. 4071 4072 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4073 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4074 The 'M','N' parameters are 8,8, and have the same values on all procs. 4075 4076 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4077 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4078 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4079 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4080 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4081 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4082 4083 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4084 allocated for every row of the local diagonal submatrix, and `o_nz` 4085 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4086 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4087 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4088 In this case, the values of `d_nz`, `o_nz` are 4089 .vb 4090 proc0 dnz = 2, o_nz = 2 4091 proc1 dnz = 3, o_nz = 2 4092 proc2 dnz = 1, o_nz = 4 4093 .ve 4094 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4095 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4096 for proc3. i.e we are using 12+15+10=37 storage locations to store 4097 34 values. 4098 4099 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4100 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4101 In the above case the values for `d_nnz`, `o_nnz` are 4102 .vb 4103 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4104 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4105 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4106 .ve 4107 Here the space allocated is sum of all the above values i.e 34, and 4108 hence pre-allocation is perfect. 4109 4110 Level: intermediate 4111 4112 Notes: 4113 If the *_nnz parameter is given then the *_nz parameter is ignored 4114 4115 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4116 storage. The stored row and column indices begin with zero. 4117 See [Sparse Matrices](sec_matsparse) for details. 4118 4119 The parallel matrix is partitioned such that the first m0 rows belong to 4120 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4121 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4122 4123 The DIAGONAL portion of the local submatrix of a processor can be defined 4124 as the submatrix which is obtained by extraction the part corresponding to 4125 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4126 first row that belongs to the processor, r2 is the last row belonging to 4127 the this processor, and c1-c2 is range of indices of the local part of a 4128 vector suitable for applying the matrix to. This is an mxn matrix. In the 4129 common case of a square matrix, the row and column ranges are the same and 4130 the DIAGONAL part is also square. The remaining portion of the local 4131 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4132 4133 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4134 4135 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4136 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4137 You can also run with the option `-info` and look for messages with the string 4138 malloc in them to see if additional memory allocation was needed. 4139 4140 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4141 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4142 @*/ 4143 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4144 { 4145 PetscFunctionBegin; 4146 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4147 PetscValidType(B, 1); 4148 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4149 PetscFunctionReturn(PETSC_SUCCESS); 4150 } 4151 4152 /*@ 4153 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4154 CSR format for the local rows. 4155 4156 Collective 4157 4158 Input Parameters: 4159 + comm - MPI communicator 4160 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4161 . n - This value should be the same as the local size used in creating the 4162 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4163 calculated if N is given) For square matrices n is almost always m. 4164 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4165 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4166 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4167 . j - column indices 4168 - a - optional matrix values 4169 4170 Output Parameter: 4171 . mat - the matrix 4172 4173 Level: intermediate 4174 4175 Notes: 4176 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4177 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4178 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4179 4180 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4181 4182 The format which is used for the sparse matrix input, is equivalent to a 4183 row-major ordering.. i.e for the following matrix, the input data expected is 4184 as shown 4185 4186 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4187 .vb 4188 1 0 0 4189 2 0 3 P0 4190 ------- 4191 4 5 6 P1 4192 4193 Process0 [P0] rows_owned=[0,1] 4194 i = {0,1,3} [size = nrow+1 = 2+1] 4195 j = {0,0,2} [size = 3] 4196 v = {1,2,3} [size = 3] 4197 4198 Process1 [P1] rows_owned=[2] 4199 i = {0,3} [size = nrow+1 = 1+1] 4200 j = {0,1,2} [size = 3] 4201 v = {4,5,6} [size = 3] 4202 .ve 4203 4204 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4205 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4206 @*/ 4207 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4208 { 4209 PetscFunctionBegin; 4210 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4211 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4212 PetscCall(MatCreate(comm, mat)); 4213 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4214 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4215 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4216 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4217 PetscFunctionReturn(PETSC_SUCCESS); 4218 } 4219 4220 /*@ 4221 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4222 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4223 from `MatCreateMPIAIJWithArrays()` 4224 4225 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4226 4227 Collective 4228 4229 Input Parameters: 4230 + mat - the matrix 4231 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4232 . n - This value should be the same as the local size used in creating the 4233 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4234 calculated if N is given) For square matrices n is almost always m. 4235 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4236 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4237 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4238 . J - column indices 4239 - v - matrix values 4240 4241 Level: deprecated 4242 4243 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4244 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4245 @*/ 4246 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4247 { 4248 PetscInt nnz, i; 4249 PetscBool nooffprocentries; 4250 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4251 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4252 PetscScalar *ad, *ao; 4253 PetscInt ldi, Iii, md; 4254 const PetscInt *Adi = Ad->i; 4255 PetscInt *ld = Aij->ld; 4256 4257 PetscFunctionBegin; 4258 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4259 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4260 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4261 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4262 4263 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4264 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4265 4266 for (i = 0; i < m; i++) { 4267 nnz = Ii[i + 1] - Ii[i]; 4268 Iii = Ii[i]; 4269 ldi = ld[i]; 4270 md = Adi[i + 1] - Adi[i]; 4271 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4272 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4273 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4274 ad += md; 4275 ao += nnz - md; 4276 } 4277 nooffprocentries = mat->nooffprocentries; 4278 mat->nooffprocentries = PETSC_TRUE; 4279 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4280 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4281 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4282 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4283 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4284 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4285 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4286 mat->nooffprocentries = nooffprocentries; 4287 PetscFunctionReturn(PETSC_SUCCESS); 4288 } 4289 4290 /*@ 4291 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4292 4293 Collective 4294 4295 Input Parameters: 4296 + mat - the matrix 4297 - v - matrix values, stored by row 4298 4299 Level: intermediate 4300 4301 Note: 4302 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4303 4304 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4305 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4306 @*/ 4307 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4308 { 4309 PetscInt nnz, i, m; 4310 PetscBool nooffprocentries; 4311 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4312 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4313 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4314 PetscScalar *ad, *ao; 4315 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4316 PetscInt ldi, Iii, md; 4317 PetscInt *ld = Aij->ld; 4318 4319 PetscFunctionBegin; 4320 m = mat->rmap->n; 4321 4322 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4323 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4324 Iii = 0; 4325 for (i = 0; i < m; i++) { 4326 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4327 ldi = ld[i]; 4328 md = Adi[i + 1] - Adi[i]; 4329 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4330 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4331 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4332 ad += md; 4333 ao += nnz - md; 4334 Iii += nnz; 4335 } 4336 nooffprocentries = mat->nooffprocentries; 4337 mat->nooffprocentries = PETSC_TRUE; 4338 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4339 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4340 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4341 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4342 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4343 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4344 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4345 mat->nooffprocentries = nooffprocentries; 4346 PetscFunctionReturn(PETSC_SUCCESS); 4347 } 4348 4349 /*@C 4350 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4351 (the default parallel PETSc format). For good matrix assembly performance 4352 the user should preallocate the matrix storage by setting the parameters 4353 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4354 4355 Collective 4356 4357 Input Parameters: 4358 + comm - MPI communicator 4359 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4360 This value should be the same as the local size used in creating the 4361 y vector for the matrix-vector product y = Ax. 4362 . n - This value should be the same as the local size used in creating the 4363 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4364 calculated if N is given) For square matrices n is almost always m. 4365 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4366 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4367 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4368 (same value is used for all local rows) 4369 . d_nnz - array containing the number of nonzeros in the various rows of the 4370 DIAGONAL portion of the local submatrix (possibly different for each row) 4371 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4372 The size of this array is equal to the number of local rows, i.e 'm'. 4373 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4374 submatrix (same value is used for all local rows). 4375 - o_nnz - array containing the number of nonzeros in the various rows of the 4376 OFF-DIAGONAL portion of the local submatrix (possibly different for 4377 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4378 structure. The size of this array is equal to the number 4379 of local rows, i.e 'm'. 4380 4381 Output Parameter: 4382 . A - the matrix 4383 4384 Options Database Keys: 4385 + -mat_no_inode - Do not use inodes 4386 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4387 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4388 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4389 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4390 4391 Level: intermediate 4392 4393 Notes: 4394 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4395 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4396 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4397 4398 If the *_nnz parameter is given then the *_nz parameter is ignored 4399 4400 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4401 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4402 storage requirements for this matrix. 4403 4404 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4405 processor than it must be used on all processors that share the object for 4406 that argument. 4407 4408 The user MUST specify either the local or global matrix dimensions 4409 (possibly both). 4410 4411 The parallel matrix is partitioned across processors such that the 4412 first m0 rows belong to process 0, the next m1 rows belong to 4413 process 1, the next m2 rows belong to process 2 etc.. where 4414 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4415 values corresponding to [m x N] submatrix. 4416 4417 The columns are logically partitioned with the n0 columns belonging 4418 to 0th partition, the next n1 columns belonging to the next 4419 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4420 4421 The DIAGONAL portion of the local submatrix on any given processor 4422 is the submatrix corresponding to the rows and columns m,n 4423 corresponding to the given processor. i.e diagonal matrix on 4424 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4425 etc. The remaining portion of the local submatrix [m x (N-n)] 4426 constitute the OFF-DIAGONAL portion. The example below better 4427 illustrates this concept. 4428 4429 For a square global matrix we define each processor's diagonal portion 4430 to be its local rows and the corresponding columns (a square submatrix); 4431 each processor's off-diagonal portion encompasses the remainder of the 4432 local matrix (a rectangular submatrix). 4433 4434 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4435 4436 When calling this routine with a single process communicator, a matrix of 4437 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4438 type of communicator, use the construction mechanism 4439 .vb 4440 MatCreate(...,&A); 4441 MatSetType(A,MATMPIAIJ); 4442 MatSetSizes(A, m,n,M,N); 4443 MatMPIAIJSetPreallocation(A,...); 4444 .ve 4445 4446 By default, this format uses inodes (identical nodes) when possible. 4447 We search for consecutive rows with the same nonzero structure, thereby 4448 reusing matrix information to achieve increased efficiency. 4449 4450 Usage: 4451 Consider the following 8x8 matrix with 34 non-zero values, that is 4452 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4453 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4454 as follows 4455 4456 .vb 4457 1 2 0 | 0 3 0 | 0 4 4458 Proc0 0 5 6 | 7 0 0 | 8 0 4459 9 0 10 | 11 0 0 | 12 0 4460 ------------------------------------- 4461 13 0 14 | 15 16 17 | 0 0 4462 Proc1 0 18 0 | 19 20 21 | 0 0 4463 0 0 0 | 22 23 0 | 24 0 4464 ------------------------------------- 4465 Proc2 25 26 27 | 0 0 28 | 29 0 4466 30 0 0 | 31 32 33 | 0 34 4467 .ve 4468 4469 This can be represented as a collection of submatrices as 4470 4471 .vb 4472 A B C 4473 D E F 4474 G H I 4475 .ve 4476 4477 Where the submatrices A,B,C are owned by proc0, D,E,F are 4478 owned by proc1, G,H,I are owned by proc2. 4479 4480 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4481 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4482 The 'M','N' parameters are 8,8, and have the same values on all procs. 4483 4484 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4485 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4486 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4487 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4488 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4489 matrix, ans [DF] as another SeqAIJ matrix. 4490 4491 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4492 allocated for every row of the local diagonal submatrix, and `o_nz` 4493 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4494 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4495 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4496 In this case, the values of `d_nz`,`o_nz` are 4497 .vb 4498 proc0 dnz = 2, o_nz = 2 4499 proc1 dnz = 3, o_nz = 2 4500 proc2 dnz = 1, o_nz = 4 4501 .ve 4502 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4503 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4504 for proc3. i.e we are using 12+15+10=37 storage locations to store 4505 34 values. 4506 4507 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4508 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4509 In the above case the values for d_nnz,o_nnz are 4510 .vb 4511 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4512 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4513 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4514 .ve 4515 Here the space allocated is sum of all the above values i.e 34, and 4516 hence pre-allocation is perfect. 4517 4518 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4519 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4520 @*/ 4521 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4522 { 4523 PetscMPIInt size; 4524 4525 PetscFunctionBegin; 4526 PetscCall(MatCreate(comm, A)); 4527 PetscCall(MatSetSizes(*A, m, n, M, N)); 4528 PetscCallMPI(MPI_Comm_size(comm, &size)); 4529 if (size > 1) { 4530 PetscCall(MatSetType(*A, MATMPIAIJ)); 4531 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4532 } else { 4533 PetscCall(MatSetType(*A, MATSEQAIJ)); 4534 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4535 } 4536 PetscFunctionReturn(PETSC_SUCCESS); 4537 } 4538 4539 /*MC 4540 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4541 4542 Synopsis: 4543 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4544 4545 Not Collective 4546 4547 Input Parameter: 4548 . A - the `MATMPIAIJ` matrix 4549 4550 Output Parameters: 4551 + Ad - the diagonal portion of the matrix 4552 . Ao - the off diagonal portion of the matrix 4553 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4554 - ierr - error code 4555 4556 Level: advanced 4557 4558 Note: 4559 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4560 4561 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4562 M*/ 4563 4564 /*MC 4565 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4566 4567 Synopsis: 4568 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4569 4570 Not Collective 4571 4572 Input Parameters: 4573 + A - the `MATMPIAIJ` matrix 4574 . Ad - the diagonal portion of the matrix 4575 . Ao - the off diagonal portion of the matrix 4576 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4577 - ierr - error code 4578 4579 Level: advanced 4580 4581 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4582 M*/ 4583 4584 /*@C 4585 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4586 4587 Not Collective 4588 4589 Input Parameter: 4590 . A - The `MATMPIAIJ` matrix 4591 4592 Output Parameters: 4593 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4594 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4595 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4596 4597 Level: intermediate 4598 4599 Note: 4600 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4601 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4602 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4603 local column numbers to global column numbers in the original matrix. 4604 4605 Fortran Note: 4606 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4607 4608 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4609 @*/ 4610 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4611 { 4612 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4613 PetscBool flg; 4614 4615 PetscFunctionBegin; 4616 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4617 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4618 if (Ad) *Ad = a->A; 4619 if (Ao) *Ao = a->B; 4620 if (colmap) *colmap = a->garray; 4621 PetscFunctionReturn(PETSC_SUCCESS); 4622 } 4623 4624 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4625 { 4626 PetscInt m, N, i, rstart, nnz, Ii; 4627 PetscInt *indx; 4628 PetscScalar *values; 4629 MatType rootType; 4630 4631 PetscFunctionBegin; 4632 PetscCall(MatGetSize(inmat, &m, &N)); 4633 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4634 PetscInt *dnz, *onz, sum, bs, cbs; 4635 4636 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4637 /* Check sum(n) = N */ 4638 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4639 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4640 4641 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4642 rstart -= m; 4643 4644 MatPreallocateBegin(comm, m, n, dnz, onz); 4645 for (i = 0; i < m; i++) { 4646 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4647 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4648 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4649 } 4650 4651 PetscCall(MatCreate(comm, outmat)); 4652 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4653 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4654 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4655 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4656 PetscCall(MatSetType(*outmat, rootType)); 4657 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4658 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4659 MatPreallocateEnd(dnz, onz); 4660 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4661 } 4662 4663 /* numeric phase */ 4664 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4665 for (i = 0; i < m; i++) { 4666 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4667 Ii = i + rstart; 4668 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4669 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4670 } 4671 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4672 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4673 PetscFunctionReturn(PETSC_SUCCESS); 4674 } 4675 4676 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4677 { 4678 PetscMPIInt rank; 4679 PetscInt m, N, i, rstart, nnz; 4680 size_t len; 4681 const PetscInt *indx; 4682 PetscViewer out; 4683 char *name; 4684 Mat B; 4685 const PetscScalar *values; 4686 4687 PetscFunctionBegin; 4688 PetscCall(MatGetLocalSize(A, &m, NULL)); 4689 PetscCall(MatGetSize(A, NULL, &N)); 4690 /* Should this be the type of the diagonal block of A? */ 4691 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4692 PetscCall(MatSetSizes(B, m, N, m, N)); 4693 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4694 PetscCall(MatSetType(B, MATSEQAIJ)); 4695 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4696 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4697 for (i = 0; i < m; i++) { 4698 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4699 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4700 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4701 } 4702 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4703 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4704 4705 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4706 PetscCall(PetscStrlen(outfile, &len)); 4707 PetscCall(PetscMalloc1(len + 6, &name)); 4708 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4709 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4710 PetscCall(PetscFree(name)); 4711 PetscCall(MatView(B, out)); 4712 PetscCall(PetscViewerDestroy(&out)); 4713 PetscCall(MatDestroy(&B)); 4714 PetscFunctionReturn(PETSC_SUCCESS); 4715 } 4716 4717 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4718 { 4719 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4720 4721 PetscFunctionBegin; 4722 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4723 PetscCall(PetscFree(merge->id_r)); 4724 PetscCall(PetscFree(merge->len_s)); 4725 PetscCall(PetscFree(merge->len_r)); 4726 PetscCall(PetscFree(merge->bi)); 4727 PetscCall(PetscFree(merge->bj)); 4728 PetscCall(PetscFree(merge->buf_ri[0])); 4729 PetscCall(PetscFree(merge->buf_ri)); 4730 PetscCall(PetscFree(merge->buf_rj[0])); 4731 PetscCall(PetscFree(merge->buf_rj)); 4732 PetscCall(PetscFree(merge->coi)); 4733 PetscCall(PetscFree(merge->coj)); 4734 PetscCall(PetscFree(merge->owners_co)); 4735 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4736 PetscCall(PetscFree(merge)); 4737 PetscFunctionReturn(PETSC_SUCCESS); 4738 } 4739 4740 #include <../src/mat/utils/freespace.h> 4741 #include <petscbt.h> 4742 4743 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4744 { 4745 MPI_Comm comm; 4746 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4747 PetscMPIInt size, rank, taga, *len_s; 4748 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4749 PetscInt proc, m; 4750 PetscInt **buf_ri, **buf_rj; 4751 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4752 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4753 MPI_Request *s_waits, *r_waits; 4754 MPI_Status *status; 4755 const MatScalar *aa, *a_a; 4756 MatScalar **abuf_r, *ba_i; 4757 Mat_Merge_SeqsToMPI *merge; 4758 PetscContainer container; 4759 4760 PetscFunctionBegin; 4761 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4762 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4763 4764 PetscCallMPI(MPI_Comm_size(comm, &size)); 4765 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4766 4767 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4768 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4769 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4770 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4771 aa = a_a; 4772 4773 bi = merge->bi; 4774 bj = merge->bj; 4775 buf_ri = merge->buf_ri; 4776 buf_rj = merge->buf_rj; 4777 4778 PetscCall(PetscMalloc1(size, &status)); 4779 owners = merge->rowmap->range; 4780 len_s = merge->len_s; 4781 4782 /* send and recv matrix values */ 4783 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4784 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4785 4786 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4787 for (proc = 0, k = 0; proc < size; proc++) { 4788 if (!len_s[proc]) continue; 4789 i = owners[proc]; 4790 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4791 k++; 4792 } 4793 4794 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4795 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4796 PetscCall(PetscFree(status)); 4797 4798 PetscCall(PetscFree(s_waits)); 4799 PetscCall(PetscFree(r_waits)); 4800 4801 /* insert mat values of mpimat */ 4802 PetscCall(PetscMalloc1(N, &ba_i)); 4803 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4804 4805 for (k = 0; k < merge->nrecv; k++) { 4806 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4807 nrows = *(buf_ri_k[k]); 4808 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4809 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4810 } 4811 4812 /* set values of ba */ 4813 m = merge->rowmap->n; 4814 for (i = 0; i < m; i++) { 4815 arow = owners[rank] + i; 4816 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4817 bnzi = bi[i + 1] - bi[i]; 4818 PetscCall(PetscArrayzero(ba_i, bnzi)); 4819 4820 /* add local non-zero vals of this proc's seqmat into ba */ 4821 anzi = ai[arow + 1] - ai[arow]; 4822 aj = a->j + ai[arow]; 4823 aa = a_a + ai[arow]; 4824 nextaj = 0; 4825 for (j = 0; nextaj < anzi; j++) { 4826 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4827 ba_i[j] += aa[nextaj++]; 4828 } 4829 } 4830 4831 /* add received vals into ba */ 4832 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4833 /* i-th row */ 4834 if (i == *nextrow[k]) { 4835 anzi = *(nextai[k] + 1) - *nextai[k]; 4836 aj = buf_rj[k] + *(nextai[k]); 4837 aa = abuf_r[k] + *(nextai[k]); 4838 nextaj = 0; 4839 for (j = 0; nextaj < anzi; j++) { 4840 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4841 ba_i[j] += aa[nextaj++]; 4842 } 4843 } 4844 nextrow[k]++; 4845 nextai[k]++; 4846 } 4847 } 4848 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4849 } 4850 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4851 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4852 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4853 4854 PetscCall(PetscFree(abuf_r[0])); 4855 PetscCall(PetscFree(abuf_r)); 4856 PetscCall(PetscFree(ba_i)); 4857 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4858 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4859 PetscFunctionReturn(PETSC_SUCCESS); 4860 } 4861 4862 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4863 { 4864 Mat B_mpi; 4865 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4866 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4867 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4868 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4869 PetscInt len, proc, *dnz, *onz, bs, cbs; 4870 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4871 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4872 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4873 MPI_Status *status; 4874 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4875 PetscBT lnkbt; 4876 Mat_Merge_SeqsToMPI *merge; 4877 PetscContainer container; 4878 4879 PetscFunctionBegin; 4880 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4881 4882 /* make sure it is a PETSc comm */ 4883 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4884 PetscCallMPI(MPI_Comm_size(comm, &size)); 4885 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4886 4887 PetscCall(PetscNew(&merge)); 4888 PetscCall(PetscMalloc1(size, &status)); 4889 4890 /* determine row ownership */ 4891 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4892 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4893 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4894 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4895 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4896 PetscCall(PetscMalloc1(size, &len_si)); 4897 PetscCall(PetscMalloc1(size, &merge->len_s)); 4898 4899 m = merge->rowmap->n; 4900 owners = merge->rowmap->range; 4901 4902 /* determine the number of messages to send, their lengths */ 4903 len_s = merge->len_s; 4904 4905 len = 0; /* length of buf_si[] */ 4906 merge->nsend = 0; 4907 for (proc = 0; proc < size; proc++) { 4908 len_si[proc] = 0; 4909 if (proc == rank) { 4910 len_s[proc] = 0; 4911 } else { 4912 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4913 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4914 } 4915 if (len_s[proc]) { 4916 merge->nsend++; 4917 nrows = 0; 4918 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4919 if (ai[i + 1] > ai[i]) nrows++; 4920 } 4921 len_si[proc] = 2 * (nrows + 1); 4922 len += len_si[proc]; 4923 } 4924 } 4925 4926 /* determine the number and length of messages to receive for ij-structure */ 4927 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4928 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4929 4930 /* post the Irecv of j-structure */ 4931 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4932 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4933 4934 /* post the Isend of j-structure */ 4935 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4936 4937 for (proc = 0, k = 0; proc < size; proc++) { 4938 if (!len_s[proc]) continue; 4939 i = owners[proc]; 4940 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4941 k++; 4942 } 4943 4944 /* receives and sends of j-structure are complete */ 4945 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4946 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4947 4948 /* send and recv i-structure */ 4949 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4950 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4951 4952 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4953 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4954 for (proc = 0, k = 0; proc < size; proc++) { 4955 if (!len_s[proc]) continue; 4956 /* form outgoing message for i-structure: 4957 buf_si[0]: nrows to be sent 4958 [1:nrows]: row index (global) 4959 [nrows+1:2*nrows+1]: i-structure index 4960 */ 4961 nrows = len_si[proc] / 2 - 1; 4962 buf_si_i = buf_si + nrows + 1; 4963 buf_si[0] = nrows; 4964 buf_si_i[0] = 0; 4965 nrows = 0; 4966 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4967 anzi = ai[i + 1] - ai[i]; 4968 if (anzi) { 4969 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4970 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4971 nrows++; 4972 } 4973 } 4974 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4975 k++; 4976 buf_si += len_si[proc]; 4977 } 4978 4979 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4980 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4981 4982 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4983 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4984 4985 PetscCall(PetscFree(len_si)); 4986 PetscCall(PetscFree(len_ri)); 4987 PetscCall(PetscFree(rj_waits)); 4988 PetscCall(PetscFree2(si_waits, sj_waits)); 4989 PetscCall(PetscFree(ri_waits)); 4990 PetscCall(PetscFree(buf_s)); 4991 PetscCall(PetscFree(status)); 4992 4993 /* compute a local seq matrix in each processor */ 4994 /* allocate bi array and free space for accumulating nonzero column info */ 4995 PetscCall(PetscMalloc1(m + 1, &bi)); 4996 bi[0] = 0; 4997 4998 /* create and initialize a linked list */ 4999 nlnk = N + 1; 5000 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 5001 5002 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5003 len = ai[owners[rank + 1]] - ai[owners[rank]]; 5004 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5005 5006 current_space = free_space; 5007 5008 /* determine symbolic info for each local row */ 5009 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5010 5011 for (k = 0; k < merge->nrecv; k++) { 5012 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5013 nrows = *buf_ri_k[k]; 5014 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5015 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5016 } 5017 5018 MatPreallocateBegin(comm, m, n, dnz, onz); 5019 len = 0; 5020 for (i = 0; i < m; i++) { 5021 bnzi = 0; 5022 /* add local non-zero cols of this proc's seqmat into lnk */ 5023 arow = owners[rank] + i; 5024 anzi = ai[arow + 1] - ai[arow]; 5025 aj = a->j + ai[arow]; 5026 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5027 bnzi += nlnk; 5028 /* add received col data into lnk */ 5029 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5030 if (i == *nextrow[k]) { /* i-th row */ 5031 anzi = *(nextai[k] + 1) - *nextai[k]; 5032 aj = buf_rj[k] + *nextai[k]; 5033 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5034 bnzi += nlnk; 5035 nextrow[k]++; 5036 nextai[k]++; 5037 } 5038 } 5039 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5040 5041 /* if free space is not available, make more free space */ 5042 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5043 /* copy data into free space, then initialize lnk */ 5044 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5045 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5046 5047 current_space->array += bnzi; 5048 current_space->local_used += bnzi; 5049 current_space->local_remaining -= bnzi; 5050 5051 bi[i + 1] = bi[i] + bnzi; 5052 } 5053 5054 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5055 5056 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5057 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5058 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5059 5060 /* create symbolic parallel matrix B_mpi */ 5061 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5062 PetscCall(MatCreate(comm, &B_mpi)); 5063 if (n == PETSC_DECIDE) { 5064 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5065 } else { 5066 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5067 } 5068 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5069 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5070 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5071 MatPreallocateEnd(dnz, onz); 5072 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5073 5074 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5075 B_mpi->assembled = PETSC_FALSE; 5076 merge->bi = bi; 5077 merge->bj = bj; 5078 merge->buf_ri = buf_ri; 5079 merge->buf_rj = buf_rj; 5080 merge->coi = NULL; 5081 merge->coj = NULL; 5082 merge->owners_co = NULL; 5083 5084 PetscCall(PetscCommDestroy(&comm)); 5085 5086 /* attach the supporting struct to B_mpi for reuse */ 5087 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5088 PetscCall(PetscContainerSetPointer(container, merge)); 5089 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5090 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5091 PetscCall(PetscContainerDestroy(&container)); 5092 *mpimat = B_mpi; 5093 5094 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5095 PetscFunctionReturn(PETSC_SUCCESS); 5096 } 5097 5098 /*@C 5099 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5100 matrices from each processor 5101 5102 Collective 5103 5104 Input Parameters: 5105 + comm - the communicators the parallel matrix will live on 5106 . seqmat - the input sequential matrices 5107 . m - number of local rows (or `PETSC_DECIDE`) 5108 . n - number of local columns (or `PETSC_DECIDE`) 5109 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5110 5111 Output Parameter: 5112 . mpimat - the parallel matrix generated 5113 5114 Level: advanced 5115 5116 Note: 5117 The dimensions of the sequential matrix in each processor MUST be the same. 5118 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5119 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5120 5121 .seealso: [](chapter_matrices), `Mat`, `MatCreateAIJ()` 5122 @*/ 5123 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5124 { 5125 PetscMPIInt size; 5126 5127 PetscFunctionBegin; 5128 PetscCallMPI(MPI_Comm_size(comm, &size)); 5129 if (size == 1) { 5130 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5131 if (scall == MAT_INITIAL_MATRIX) { 5132 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5133 } else { 5134 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5135 } 5136 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5137 PetscFunctionReturn(PETSC_SUCCESS); 5138 } 5139 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5140 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5141 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5142 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5143 PetscFunctionReturn(PETSC_SUCCESS); 5144 } 5145 5146 /*@ 5147 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking its local rows and putting them into a sequential matrix with 5148 mlocal rows and n columns. Where mlocal is obtained with `MatGetLocalSize()` and n is the global column count obtained 5149 with `MatGetSize()` 5150 5151 Not Collective 5152 5153 Input Parameter: 5154 . A - the matrix 5155 5156 Output Parameter: 5157 . A_loc - the local sequential matrix generated 5158 5159 Level: developer 5160 5161 Notes: 5162 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5163 5164 Destroy the matrix with `MatDestroy()` 5165 5166 .seealso: [](chapter_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5167 @*/ 5168 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5169 { 5170 PetscBool mpi; 5171 5172 PetscFunctionBegin; 5173 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5174 if (mpi) { 5175 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5176 } else { 5177 *A_loc = A; 5178 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5179 } 5180 PetscFunctionReturn(PETSC_SUCCESS); 5181 } 5182 5183 /*@ 5184 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5185 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5186 with `MatGetSize()` 5187 5188 Not Collective 5189 5190 Input Parameters: 5191 + A - the matrix 5192 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5193 5194 Output Parameter: 5195 . A_loc - the local sequential matrix generated 5196 5197 Level: developer 5198 5199 Notes: 5200 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5201 5202 When the communicator associated with `A` has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A`. 5203 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called. 5204 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5205 modify the values of the returned `A_loc`. 5206 5207 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5208 @*/ 5209 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5210 { 5211 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5212 Mat_SeqAIJ *mat, *a, *b; 5213 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5214 const PetscScalar *aa, *ba, *aav, *bav; 5215 PetscScalar *ca, *cam; 5216 PetscMPIInt size; 5217 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5218 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5219 PetscBool match; 5220 5221 PetscFunctionBegin; 5222 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5223 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5224 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5225 if (size == 1) { 5226 if (scall == MAT_INITIAL_MATRIX) { 5227 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5228 *A_loc = mpimat->A; 5229 } else if (scall == MAT_REUSE_MATRIX) { 5230 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5231 } 5232 PetscFunctionReturn(PETSC_SUCCESS); 5233 } 5234 5235 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5236 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5237 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5238 ai = a->i; 5239 aj = a->j; 5240 bi = b->i; 5241 bj = b->j; 5242 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5243 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5244 aa = aav; 5245 ba = bav; 5246 if (scall == MAT_INITIAL_MATRIX) { 5247 PetscCall(PetscMalloc1(1 + am, &ci)); 5248 ci[0] = 0; 5249 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5250 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5251 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5252 k = 0; 5253 for (i = 0; i < am; i++) { 5254 ncols_o = bi[i + 1] - bi[i]; 5255 ncols_d = ai[i + 1] - ai[i]; 5256 /* off-diagonal portion of A */ 5257 for (jo = 0; jo < ncols_o; jo++) { 5258 col = cmap[*bj]; 5259 if (col >= cstart) break; 5260 cj[k] = col; 5261 bj++; 5262 ca[k++] = *ba++; 5263 } 5264 /* diagonal portion of A */ 5265 for (j = 0; j < ncols_d; j++) { 5266 cj[k] = cstart + *aj++; 5267 ca[k++] = *aa++; 5268 } 5269 /* off-diagonal portion of A */ 5270 for (j = jo; j < ncols_o; j++) { 5271 cj[k] = cmap[*bj++]; 5272 ca[k++] = *ba++; 5273 } 5274 } 5275 /* put together the new matrix */ 5276 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5277 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5278 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5279 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5280 mat->free_a = PETSC_TRUE; 5281 mat->free_ij = PETSC_TRUE; 5282 mat->nonew = 0; 5283 } else if (scall == MAT_REUSE_MATRIX) { 5284 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5285 ci = mat->i; 5286 cj = mat->j; 5287 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5288 for (i = 0; i < am; i++) { 5289 /* off-diagonal portion of A */ 5290 ncols_o = bi[i + 1] - bi[i]; 5291 for (jo = 0; jo < ncols_o; jo++) { 5292 col = cmap[*bj]; 5293 if (col >= cstart) break; 5294 *cam++ = *ba++; 5295 bj++; 5296 } 5297 /* diagonal portion of A */ 5298 ncols_d = ai[i + 1] - ai[i]; 5299 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5300 /* off-diagonal portion of A */ 5301 for (j = jo; j < ncols_o; j++) { 5302 *cam++ = *ba++; 5303 bj++; 5304 } 5305 } 5306 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5307 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5308 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5309 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5310 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5311 PetscFunctionReturn(PETSC_SUCCESS); 5312 } 5313 5314 /*@ 5315 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5316 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5317 5318 Not Collective 5319 5320 Input Parameters: 5321 + A - the matrix 5322 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5323 5324 Output Parameters: 5325 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5326 - A_loc - the local sequential matrix generated 5327 5328 Level: developer 5329 5330 Note: 5331 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5332 part, then those associated with the off diagonal part (in its local ordering) 5333 5334 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5335 @*/ 5336 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5337 { 5338 Mat Ao, Ad; 5339 const PetscInt *cmap; 5340 PetscMPIInt size; 5341 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5342 5343 PetscFunctionBegin; 5344 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5345 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5346 if (size == 1) { 5347 if (scall == MAT_INITIAL_MATRIX) { 5348 PetscCall(PetscObjectReference((PetscObject)Ad)); 5349 *A_loc = Ad; 5350 } else if (scall == MAT_REUSE_MATRIX) { 5351 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5352 } 5353 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5354 PetscFunctionReturn(PETSC_SUCCESS); 5355 } 5356 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5357 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5358 if (f) { 5359 PetscCall((*f)(A, scall, glob, A_loc)); 5360 } else { 5361 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5362 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5363 Mat_SeqAIJ *c; 5364 PetscInt *ai = a->i, *aj = a->j; 5365 PetscInt *bi = b->i, *bj = b->j; 5366 PetscInt *ci, *cj; 5367 const PetscScalar *aa, *ba; 5368 PetscScalar *ca; 5369 PetscInt i, j, am, dn, on; 5370 5371 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5372 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5373 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5374 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5375 if (scall == MAT_INITIAL_MATRIX) { 5376 PetscInt k; 5377 PetscCall(PetscMalloc1(1 + am, &ci)); 5378 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5379 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5380 ci[0] = 0; 5381 for (i = 0, k = 0; i < am; i++) { 5382 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5383 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5384 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5385 /* diagonal portion of A */ 5386 for (j = 0; j < ncols_d; j++, k++) { 5387 cj[k] = *aj++; 5388 ca[k] = *aa++; 5389 } 5390 /* off-diagonal portion of A */ 5391 for (j = 0; j < ncols_o; j++, k++) { 5392 cj[k] = dn + *bj++; 5393 ca[k] = *ba++; 5394 } 5395 } 5396 /* put together the new matrix */ 5397 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5398 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5399 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5400 c = (Mat_SeqAIJ *)(*A_loc)->data; 5401 c->free_a = PETSC_TRUE; 5402 c->free_ij = PETSC_TRUE; 5403 c->nonew = 0; 5404 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5405 } else if (scall == MAT_REUSE_MATRIX) { 5406 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5407 for (i = 0; i < am; i++) { 5408 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5409 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5410 /* diagonal portion of A */ 5411 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5412 /* off-diagonal portion of A */ 5413 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5414 } 5415 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5416 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5417 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5418 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5419 if (glob) { 5420 PetscInt cst, *gidx; 5421 5422 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5423 PetscCall(PetscMalloc1(dn + on, &gidx)); 5424 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5425 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5426 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5427 } 5428 } 5429 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5430 PetscFunctionReturn(PETSC_SUCCESS); 5431 } 5432 5433 /*@C 5434 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5435 5436 Not Collective 5437 5438 Input Parameters: 5439 + A - the matrix 5440 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5441 . row - index set of rows to extract (or `NULL`) 5442 - col - index set of columns to extract (or `NULL`) 5443 5444 Output Parameter: 5445 . A_loc - the local sequential matrix generated 5446 5447 Level: developer 5448 5449 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5450 @*/ 5451 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5452 { 5453 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5454 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5455 IS isrowa, iscola; 5456 Mat *aloc; 5457 PetscBool match; 5458 5459 PetscFunctionBegin; 5460 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5461 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5462 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5463 if (!row) { 5464 start = A->rmap->rstart; 5465 end = A->rmap->rend; 5466 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5467 } else { 5468 isrowa = *row; 5469 } 5470 if (!col) { 5471 start = A->cmap->rstart; 5472 cmap = a->garray; 5473 nzA = a->A->cmap->n; 5474 nzB = a->B->cmap->n; 5475 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5476 ncols = 0; 5477 for (i = 0; i < nzB; i++) { 5478 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5479 else break; 5480 } 5481 imark = i; 5482 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5483 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5484 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5485 } else { 5486 iscola = *col; 5487 } 5488 if (scall != MAT_INITIAL_MATRIX) { 5489 PetscCall(PetscMalloc1(1, &aloc)); 5490 aloc[0] = *A_loc; 5491 } 5492 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5493 if (!col) { /* attach global id of condensed columns */ 5494 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5495 } 5496 *A_loc = aloc[0]; 5497 PetscCall(PetscFree(aloc)); 5498 if (!row) PetscCall(ISDestroy(&isrowa)); 5499 if (!col) PetscCall(ISDestroy(&iscola)); 5500 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5501 PetscFunctionReturn(PETSC_SUCCESS); 5502 } 5503 5504 /* 5505 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5506 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5507 * on a global size. 5508 * */ 5509 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5510 { 5511 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5512 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5513 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5514 PetscMPIInt owner; 5515 PetscSFNode *iremote, *oiremote; 5516 const PetscInt *lrowindices; 5517 PetscSF sf, osf; 5518 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5519 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5520 MPI_Comm comm; 5521 ISLocalToGlobalMapping mapping; 5522 const PetscScalar *pd_a, *po_a; 5523 5524 PetscFunctionBegin; 5525 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5526 /* plocalsize is the number of roots 5527 * nrows is the number of leaves 5528 * */ 5529 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5530 PetscCall(ISGetLocalSize(rows, &nrows)); 5531 PetscCall(PetscCalloc1(nrows, &iremote)); 5532 PetscCall(ISGetIndices(rows, &lrowindices)); 5533 for (i = 0; i < nrows; i++) { 5534 /* Find a remote index and an owner for a row 5535 * The row could be local or remote 5536 * */ 5537 owner = 0; 5538 lidx = 0; 5539 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5540 iremote[i].index = lidx; 5541 iremote[i].rank = owner; 5542 } 5543 /* Create SF to communicate how many nonzero columns for each row */ 5544 PetscCall(PetscSFCreate(comm, &sf)); 5545 /* SF will figure out the number of nonzero colunms for each row, and their 5546 * offsets 5547 * */ 5548 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5549 PetscCall(PetscSFSetFromOptions(sf)); 5550 PetscCall(PetscSFSetUp(sf)); 5551 5552 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5553 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5554 PetscCall(PetscCalloc1(nrows, &pnnz)); 5555 roffsets[0] = 0; 5556 roffsets[1] = 0; 5557 for (i = 0; i < plocalsize; i++) { 5558 /* diag */ 5559 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5560 /* off diag */ 5561 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5562 /* compute offsets so that we relative location for each row */ 5563 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5564 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5565 } 5566 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5567 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5568 /* 'r' means root, and 'l' means leaf */ 5569 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5570 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5571 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5572 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5573 PetscCall(PetscSFDestroy(&sf)); 5574 PetscCall(PetscFree(roffsets)); 5575 PetscCall(PetscFree(nrcols)); 5576 dntotalcols = 0; 5577 ontotalcols = 0; 5578 ncol = 0; 5579 for (i = 0; i < nrows; i++) { 5580 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5581 ncol = PetscMax(pnnz[i], ncol); 5582 /* diag */ 5583 dntotalcols += nlcols[i * 2 + 0]; 5584 /* off diag */ 5585 ontotalcols += nlcols[i * 2 + 1]; 5586 } 5587 /* We do not need to figure the right number of columns 5588 * since all the calculations will be done by going through the raw data 5589 * */ 5590 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5591 PetscCall(MatSetUp(*P_oth)); 5592 PetscCall(PetscFree(pnnz)); 5593 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5594 /* diag */ 5595 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5596 /* off diag */ 5597 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5598 /* diag */ 5599 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5600 /* off diag */ 5601 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5602 dntotalcols = 0; 5603 ontotalcols = 0; 5604 ntotalcols = 0; 5605 for (i = 0; i < nrows; i++) { 5606 owner = 0; 5607 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5608 /* Set iremote for diag matrix */ 5609 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5610 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5611 iremote[dntotalcols].rank = owner; 5612 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5613 ilocal[dntotalcols++] = ntotalcols++; 5614 } 5615 /* off diag */ 5616 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5617 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5618 oiremote[ontotalcols].rank = owner; 5619 oilocal[ontotalcols++] = ntotalcols++; 5620 } 5621 } 5622 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5623 PetscCall(PetscFree(loffsets)); 5624 PetscCall(PetscFree(nlcols)); 5625 PetscCall(PetscSFCreate(comm, &sf)); 5626 /* P serves as roots and P_oth is leaves 5627 * Diag matrix 5628 * */ 5629 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5630 PetscCall(PetscSFSetFromOptions(sf)); 5631 PetscCall(PetscSFSetUp(sf)); 5632 5633 PetscCall(PetscSFCreate(comm, &osf)); 5634 /* Off diag */ 5635 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5636 PetscCall(PetscSFSetFromOptions(osf)); 5637 PetscCall(PetscSFSetUp(osf)); 5638 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5639 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5640 /* We operate on the matrix internal data for saving memory */ 5641 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5642 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5643 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5644 /* Convert to global indices for diag matrix */ 5645 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5646 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5647 /* We want P_oth store global indices */ 5648 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5649 /* Use memory scalable approach */ 5650 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5651 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5652 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5653 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5654 /* Convert back to local indices */ 5655 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5656 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5657 nout = 0; 5658 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5659 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5660 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5661 /* Exchange values */ 5662 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5663 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5664 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5665 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5666 /* Stop PETSc from shrinking memory */ 5667 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5668 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5669 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5670 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5671 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5672 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5673 PetscCall(PetscSFDestroy(&sf)); 5674 PetscCall(PetscSFDestroy(&osf)); 5675 PetscFunctionReturn(PETSC_SUCCESS); 5676 } 5677 5678 /* 5679 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5680 * This supports MPIAIJ and MAIJ 5681 * */ 5682 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5683 { 5684 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5685 Mat_SeqAIJ *p_oth; 5686 IS rows, map; 5687 PetscHMapI hamp; 5688 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5689 MPI_Comm comm; 5690 PetscSF sf, osf; 5691 PetscBool has; 5692 5693 PetscFunctionBegin; 5694 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5695 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5696 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5697 * and then create a submatrix (that often is an overlapping matrix) 5698 * */ 5699 if (reuse == MAT_INITIAL_MATRIX) { 5700 /* Use a hash table to figure out unique keys */ 5701 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5702 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5703 count = 0; 5704 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5705 for (i = 0; i < a->B->cmap->n; i++) { 5706 key = a->garray[i] / dof; 5707 PetscCall(PetscHMapIHas(hamp, key, &has)); 5708 if (!has) { 5709 mapping[i] = count; 5710 PetscCall(PetscHMapISet(hamp, key, count++)); 5711 } else { 5712 /* Current 'i' has the same value the previous step */ 5713 mapping[i] = count - 1; 5714 } 5715 } 5716 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5717 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5718 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5719 PetscCall(PetscCalloc1(htsize, &rowindices)); 5720 off = 0; 5721 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5722 PetscCall(PetscHMapIDestroy(&hamp)); 5723 PetscCall(PetscSortInt(htsize, rowindices)); 5724 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5725 /* In case, the matrix was already created but users want to recreate the matrix */ 5726 PetscCall(MatDestroy(P_oth)); 5727 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5728 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5729 PetscCall(ISDestroy(&map)); 5730 PetscCall(ISDestroy(&rows)); 5731 } else if (reuse == MAT_REUSE_MATRIX) { 5732 /* If matrix was already created, we simply update values using SF objects 5733 * that as attached to the matrix earlier. 5734 */ 5735 const PetscScalar *pd_a, *po_a; 5736 5737 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5738 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5739 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5740 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5741 /* Update values in place */ 5742 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5743 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5744 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5745 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5746 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5747 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5748 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5749 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5750 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5751 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5752 PetscFunctionReturn(PETSC_SUCCESS); 5753 } 5754 5755 /*@C 5756 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5757 5758 Collective 5759 5760 Input Parameters: 5761 + A - the first matrix in `MATMPIAIJ` format 5762 . B - the second matrix in `MATMPIAIJ` format 5763 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5764 5765 Output Parameters: 5766 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5767 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5768 - B_seq - the sequential matrix generated 5769 5770 Level: developer 5771 5772 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5773 @*/ 5774 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5775 { 5776 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5777 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5778 IS isrowb, iscolb; 5779 Mat *bseq = NULL; 5780 5781 PetscFunctionBegin; 5782 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5783 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5784 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5785 5786 if (scall == MAT_INITIAL_MATRIX) { 5787 start = A->cmap->rstart; 5788 cmap = a->garray; 5789 nzA = a->A->cmap->n; 5790 nzB = a->B->cmap->n; 5791 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5792 ncols = 0; 5793 for (i = 0; i < nzB; i++) { /* row < local row index */ 5794 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5795 else break; 5796 } 5797 imark = i; 5798 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5799 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5800 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5801 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5802 } else { 5803 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5804 isrowb = *rowb; 5805 iscolb = *colb; 5806 PetscCall(PetscMalloc1(1, &bseq)); 5807 bseq[0] = *B_seq; 5808 } 5809 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5810 *B_seq = bseq[0]; 5811 PetscCall(PetscFree(bseq)); 5812 if (!rowb) { 5813 PetscCall(ISDestroy(&isrowb)); 5814 } else { 5815 *rowb = isrowb; 5816 } 5817 if (!colb) { 5818 PetscCall(ISDestroy(&iscolb)); 5819 } else { 5820 *colb = iscolb; 5821 } 5822 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5823 PetscFunctionReturn(PETSC_SUCCESS); 5824 } 5825 5826 /* 5827 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5828 of the OFF-DIAGONAL portion of local A 5829 5830 Collective 5831 5832 Input Parameters: 5833 + A,B - the matrices in `MATMPIAIJ` format 5834 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5835 5836 Output Parameter: 5837 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5838 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5839 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5840 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5841 5842 Developer Note: 5843 This directly accesses information inside the VecScatter associated with the matrix-vector product 5844 for this matrix. This is not desirable.. 5845 5846 Level: developer 5847 5848 */ 5849 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5850 { 5851 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5852 Mat_SeqAIJ *b_oth; 5853 VecScatter ctx; 5854 MPI_Comm comm; 5855 const PetscMPIInt *rprocs, *sprocs; 5856 const PetscInt *srow, *rstarts, *sstarts; 5857 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5858 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5859 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5860 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5861 PetscMPIInt size, tag, rank, nreqs; 5862 5863 PetscFunctionBegin; 5864 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5865 PetscCallMPI(MPI_Comm_size(comm, &size)); 5866 5867 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5868 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5869 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5870 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5871 5872 if (size == 1) { 5873 startsj_s = NULL; 5874 bufa_ptr = NULL; 5875 *B_oth = NULL; 5876 PetscFunctionReturn(PETSC_SUCCESS); 5877 } 5878 5879 ctx = a->Mvctx; 5880 tag = ((PetscObject)ctx)->tag; 5881 5882 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5883 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5884 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5885 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5886 PetscCall(PetscMalloc1(nreqs, &reqs)); 5887 rwaits = reqs; 5888 swaits = reqs + nrecvs; 5889 5890 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5891 if (scall == MAT_INITIAL_MATRIX) { 5892 /* i-array */ 5893 /* post receives */ 5894 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5895 for (i = 0; i < nrecvs; i++) { 5896 rowlen = rvalues + rstarts[i] * rbs; 5897 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5898 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5899 } 5900 5901 /* pack the outgoing message */ 5902 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5903 5904 sstartsj[0] = 0; 5905 rstartsj[0] = 0; 5906 len = 0; /* total length of j or a array to be sent */ 5907 if (nsends) { 5908 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5909 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5910 } 5911 for (i = 0; i < nsends; i++) { 5912 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5913 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5914 for (j = 0; j < nrows; j++) { 5915 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5916 for (l = 0; l < sbs; l++) { 5917 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5918 5919 rowlen[j * sbs + l] = ncols; 5920 5921 len += ncols; 5922 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5923 } 5924 k++; 5925 } 5926 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5927 5928 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5929 } 5930 /* recvs and sends of i-array are completed */ 5931 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5932 PetscCall(PetscFree(svalues)); 5933 5934 /* allocate buffers for sending j and a arrays */ 5935 PetscCall(PetscMalloc1(len + 1, &bufj)); 5936 PetscCall(PetscMalloc1(len + 1, &bufa)); 5937 5938 /* create i-array of B_oth */ 5939 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5940 5941 b_othi[0] = 0; 5942 len = 0; /* total length of j or a array to be received */ 5943 k = 0; 5944 for (i = 0; i < nrecvs; i++) { 5945 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5946 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5947 for (j = 0; j < nrows; j++) { 5948 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5949 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5950 k++; 5951 } 5952 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5953 } 5954 PetscCall(PetscFree(rvalues)); 5955 5956 /* allocate space for j and a arrays of B_oth */ 5957 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5958 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5959 5960 /* j-array */ 5961 /* post receives of j-array */ 5962 for (i = 0; i < nrecvs; i++) { 5963 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5964 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5965 } 5966 5967 /* pack the outgoing message j-array */ 5968 if (nsends) k = sstarts[0]; 5969 for (i = 0; i < nsends; i++) { 5970 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5971 bufJ = bufj + sstartsj[i]; 5972 for (j = 0; j < nrows; j++) { 5973 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5974 for (ll = 0; ll < sbs; ll++) { 5975 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5976 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5977 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5978 } 5979 } 5980 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5981 } 5982 5983 /* recvs and sends of j-array are completed */ 5984 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5985 } else if (scall == MAT_REUSE_MATRIX) { 5986 sstartsj = *startsj_s; 5987 rstartsj = *startsj_r; 5988 bufa = *bufa_ptr; 5989 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5990 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5991 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5992 5993 /* a-array */ 5994 /* post receives of a-array */ 5995 for (i = 0; i < nrecvs; i++) { 5996 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5997 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5998 } 5999 6000 /* pack the outgoing message a-array */ 6001 if (nsends) k = sstarts[0]; 6002 for (i = 0; i < nsends; i++) { 6003 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6004 bufA = bufa + sstartsj[i]; 6005 for (j = 0; j < nrows; j++) { 6006 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6007 for (ll = 0; ll < sbs; ll++) { 6008 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6009 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6010 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6011 } 6012 } 6013 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6014 } 6015 /* recvs and sends of a-array are completed */ 6016 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6017 PetscCall(PetscFree(reqs)); 6018 6019 if (scall == MAT_INITIAL_MATRIX) { 6020 /* put together the new matrix */ 6021 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6022 6023 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6024 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6025 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6026 b_oth->free_a = PETSC_TRUE; 6027 b_oth->free_ij = PETSC_TRUE; 6028 b_oth->nonew = 0; 6029 6030 PetscCall(PetscFree(bufj)); 6031 if (!startsj_s || !bufa_ptr) { 6032 PetscCall(PetscFree2(sstartsj, rstartsj)); 6033 PetscCall(PetscFree(bufa_ptr)); 6034 } else { 6035 *startsj_s = sstartsj; 6036 *startsj_r = rstartsj; 6037 *bufa_ptr = bufa; 6038 } 6039 } else if (scall == MAT_REUSE_MATRIX) { 6040 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6041 } 6042 6043 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6044 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6045 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6046 PetscFunctionReturn(PETSC_SUCCESS); 6047 } 6048 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6052 #if defined(PETSC_HAVE_MKL_SPARSE) 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6054 #endif 6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6057 #if defined(PETSC_HAVE_ELEMENTAL) 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6059 #endif 6060 #if defined(PETSC_HAVE_SCALAPACK) 6061 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6062 #endif 6063 #if defined(PETSC_HAVE_HYPRE) 6064 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6065 #endif 6066 #if defined(PETSC_HAVE_CUDA) 6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6068 #endif 6069 #if defined(PETSC_HAVE_HIP) 6070 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6071 #endif 6072 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6073 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6074 #endif 6075 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6076 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6077 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6078 6079 /* 6080 Computes (B'*A')' since computing B*A directly is untenable 6081 6082 n p p 6083 [ ] [ ] [ ] 6084 m [ A ] * n [ B ] = m [ C ] 6085 [ ] [ ] [ ] 6086 6087 */ 6088 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6089 { 6090 Mat At, Bt, Ct; 6091 6092 PetscFunctionBegin; 6093 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6094 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6095 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6096 PetscCall(MatDestroy(&At)); 6097 PetscCall(MatDestroy(&Bt)); 6098 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6099 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6100 PetscCall(MatDestroy(&Ct)); 6101 PetscFunctionReturn(PETSC_SUCCESS); 6102 } 6103 6104 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6105 { 6106 PetscBool cisdense; 6107 6108 PetscFunctionBegin; 6109 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6110 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6111 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6112 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6113 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6114 PetscCall(MatSetUp(C)); 6115 6116 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6117 PetscFunctionReturn(PETSC_SUCCESS); 6118 } 6119 6120 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6121 { 6122 Mat_Product *product = C->product; 6123 Mat A = product->A, B = product->B; 6124 6125 PetscFunctionBegin; 6126 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6127 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6128 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6129 C->ops->productsymbolic = MatProductSymbolic_AB; 6130 PetscFunctionReturn(PETSC_SUCCESS); 6131 } 6132 6133 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6134 { 6135 Mat_Product *product = C->product; 6136 6137 PetscFunctionBegin; 6138 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6139 PetscFunctionReturn(PETSC_SUCCESS); 6140 } 6141 6142 /* 6143 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6144 6145 Input Parameters: 6146 6147 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6148 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6149 6150 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6151 6152 For Set1, j1[] contains column indices of the nonzeros. 6153 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6154 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6155 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6156 6157 Similar for Set2. 6158 6159 This routine merges the two sets of nonzeros row by row and removes repeats. 6160 6161 Output Parameters: (memory is allocated by the caller) 6162 6163 i[],j[]: the CSR of the merged matrix, which has m rows. 6164 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6165 imap2[]: similar to imap1[], but for Set2. 6166 Note we order nonzeros row-by-row and from left to right. 6167 */ 6168 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6169 { 6170 PetscInt r, m; /* Row index of mat */ 6171 PetscCount t, t1, t2, b1, e1, b2, e2; 6172 6173 PetscFunctionBegin; 6174 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6175 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6176 i[0] = 0; 6177 for (r = 0; r < m; r++) { /* Do row by row merging */ 6178 b1 = rowBegin1[r]; 6179 e1 = rowEnd1[r]; 6180 b2 = rowBegin2[r]; 6181 e2 = rowEnd2[r]; 6182 while (b1 < e1 && b2 < e2) { 6183 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6184 j[t] = j1[b1]; 6185 imap1[t1] = t; 6186 imap2[t2] = t; 6187 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6188 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6189 t1++; 6190 t2++; 6191 t++; 6192 } else if (j1[b1] < j2[b2]) { 6193 j[t] = j1[b1]; 6194 imap1[t1] = t; 6195 b1 += jmap1[t1 + 1] - jmap1[t1]; 6196 t1++; 6197 t++; 6198 } else { 6199 j[t] = j2[b2]; 6200 imap2[t2] = t; 6201 b2 += jmap2[t2 + 1] - jmap2[t2]; 6202 t2++; 6203 t++; 6204 } 6205 } 6206 /* Merge the remaining in either j1[] or j2[] */ 6207 while (b1 < e1) { 6208 j[t] = j1[b1]; 6209 imap1[t1] = t; 6210 b1 += jmap1[t1 + 1] - jmap1[t1]; 6211 t1++; 6212 t++; 6213 } 6214 while (b2 < e2) { 6215 j[t] = j2[b2]; 6216 imap2[t2] = t; 6217 b2 += jmap2[t2 + 1] - jmap2[t2]; 6218 t2++; 6219 t++; 6220 } 6221 i[r + 1] = t; 6222 } 6223 PetscFunctionReturn(PETSC_SUCCESS); 6224 } 6225 6226 /* 6227 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6228 6229 Input Parameters: 6230 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6231 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6232 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6233 6234 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6235 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6236 6237 Output Parameters: 6238 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6239 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6240 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6241 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6242 6243 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6244 Atot: number of entries belonging to the diagonal block. 6245 Annz: number of unique nonzeros belonging to the diagonal block. 6246 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6247 repeats (i.e., same 'i,j' pair). 6248 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6249 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6250 6251 Atot: number of entries belonging to the diagonal block 6252 Annz: number of unique nonzeros belonging to the diagonal block. 6253 6254 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6255 6256 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6257 */ 6258 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6259 { 6260 PetscInt cstart, cend, rstart, rend, row, col; 6261 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6262 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6263 PetscCount k, m, p, q, r, s, mid; 6264 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6265 6266 PetscFunctionBegin; 6267 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6268 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6269 m = rend - rstart; 6270 6271 for (k = 0; k < n; k++) { 6272 if (i[k] >= 0) break; 6273 } /* Skip negative rows */ 6274 6275 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6276 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6277 */ 6278 while (k < n) { 6279 row = i[k]; 6280 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6281 for (s = k; s < n; s++) 6282 if (i[s] != row) break; 6283 for (p = k; p < s; p++) { 6284 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6285 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6286 } 6287 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6288 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6289 rowBegin[row - rstart] = k; 6290 rowMid[row - rstart] = mid; 6291 rowEnd[row - rstart] = s; 6292 6293 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6294 Atot += mid - k; 6295 Btot += s - mid; 6296 6297 /* Count unique nonzeros of this diag/offdiag row */ 6298 for (p = k; p < mid;) { 6299 col = j[p]; 6300 do { 6301 j[p] += PETSC_MAX_INT; 6302 p++; 6303 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6304 Annz++; 6305 } 6306 6307 for (p = mid; p < s;) { 6308 col = j[p]; 6309 do { 6310 p++; 6311 } while (p < s && j[p] == col); 6312 Bnnz++; 6313 } 6314 k = s; 6315 } 6316 6317 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6318 PetscCall(PetscMalloc1(Atot, &Aperm)); 6319 PetscCall(PetscMalloc1(Btot, &Bperm)); 6320 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6321 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6322 6323 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6324 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6325 for (r = 0; r < m; r++) { 6326 k = rowBegin[r]; 6327 mid = rowMid[r]; 6328 s = rowEnd[r]; 6329 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6330 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6331 Atot += mid - k; 6332 Btot += s - mid; 6333 6334 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6335 for (p = k; p < mid;) { 6336 col = j[p]; 6337 q = p; 6338 do { 6339 p++; 6340 } while (p < mid && j[p] == col); 6341 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6342 Annz++; 6343 } 6344 6345 for (p = mid; p < s;) { 6346 col = j[p]; 6347 q = p; 6348 do { 6349 p++; 6350 } while (p < s && j[p] == col); 6351 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6352 Bnnz++; 6353 } 6354 } 6355 /* Output */ 6356 *Aperm_ = Aperm; 6357 *Annz_ = Annz; 6358 *Atot_ = Atot; 6359 *Ajmap_ = Ajmap; 6360 *Bperm_ = Bperm; 6361 *Bnnz_ = Bnnz; 6362 *Btot_ = Btot; 6363 *Bjmap_ = Bjmap; 6364 PetscFunctionReturn(PETSC_SUCCESS); 6365 } 6366 6367 /* 6368 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6369 6370 Input Parameters: 6371 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6372 nnz: number of unique nonzeros in the merged matrix 6373 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6374 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6375 6376 Output Parameter: (memory is allocated by the caller) 6377 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6378 6379 Example: 6380 nnz1 = 4 6381 nnz = 6 6382 imap = [1,3,4,5] 6383 jmap = [0,3,5,6,7] 6384 then, 6385 jmap_new = [0,0,3,3,5,6,7] 6386 */ 6387 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6388 { 6389 PetscCount k, p; 6390 6391 PetscFunctionBegin; 6392 jmap_new[0] = 0; 6393 p = nnz; /* p loops over jmap_new[] backwards */ 6394 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6395 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6396 } 6397 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6398 PetscFunctionReturn(PETSC_SUCCESS); 6399 } 6400 6401 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6402 { 6403 MPI_Comm comm; 6404 PetscMPIInt rank, size; 6405 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6406 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6407 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6408 6409 PetscFunctionBegin; 6410 PetscCall(PetscFree(mpiaij->garray)); 6411 PetscCall(VecDestroy(&mpiaij->lvec)); 6412 #if defined(PETSC_USE_CTABLE) 6413 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6414 #else 6415 PetscCall(PetscFree(mpiaij->colmap)); 6416 #endif 6417 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6418 mat->assembled = PETSC_FALSE; 6419 mat->was_assembled = PETSC_FALSE; 6420 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6421 6422 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6423 PetscCallMPI(MPI_Comm_size(comm, &size)); 6424 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6425 PetscCall(PetscLayoutSetUp(mat->rmap)); 6426 PetscCall(PetscLayoutSetUp(mat->cmap)); 6427 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6428 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6429 PetscCall(MatGetLocalSize(mat, &m, &n)); 6430 PetscCall(MatGetSize(mat, &M, &N)); 6431 6432 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6433 /* entries come first, then local rows, then remote rows. */ 6434 PetscCount n1 = coo_n, *perm1; 6435 PetscInt *i1 = coo_i, *j1 = coo_j; 6436 6437 PetscCall(PetscMalloc1(n1, &perm1)); 6438 for (k = 0; k < n1; k++) perm1[k] = k; 6439 6440 /* Manipulate indices so that entries with negative row or col indices will have smallest 6441 row indices, local entries will have greater but negative row indices, and remote entries 6442 will have positive row indices. 6443 */ 6444 for (k = 0; k < n1; k++) { 6445 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6446 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6447 else { 6448 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6449 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6450 } 6451 } 6452 6453 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6454 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6455 for (k = 0; k < n1; k++) { 6456 if (i1[k] > PETSC_MIN_INT) break; 6457 } /* Advance k to the first entry we need to take care of */ 6458 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6459 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6460 6461 /* Split local rows into diag/offdiag portions */ 6462 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6463 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6464 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6465 6466 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6467 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6468 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6469 6470 /* Send remote rows to their owner */ 6471 /* Find which rows should be sent to which remote ranks*/ 6472 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6473 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6474 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6475 const PetscInt *ranges; 6476 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6477 6478 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6479 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6480 for (k = rem; k < n1;) { 6481 PetscMPIInt owner; 6482 PetscInt firstRow, lastRow; 6483 6484 /* Locate a row range */ 6485 firstRow = i1[k]; /* first row of this owner */ 6486 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6487 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6488 6489 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6490 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6491 6492 /* All entries in [k,p) belong to this remote owner */ 6493 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6494 PetscMPIInt *sendto2; 6495 PetscInt *nentries2; 6496 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6497 6498 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6499 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6500 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6501 PetscCall(PetscFree2(sendto, nentries2)); 6502 sendto = sendto2; 6503 nentries = nentries2; 6504 maxNsend = maxNsend2; 6505 } 6506 sendto[nsend] = owner; 6507 nentries[nsend] = p - k; 6508 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6509 nsend++; 6510 k = p; 6511 } 6512 6513 /* Build 1st SF to know offsets on remote to send data */ 6514 PetscSF sf1; 6515 PetscInt nroots = 1, nroots2 = 0; 6516 PetscInt nleaves = nsend, nleaves2 = 0; 6517 PetscInt *offsets; 6518 PetscSFNode *iremote; 6519 6520 PetscCall(PetscSFCreate(comm, &sf1)); 6521 PetscCall(PetscMalloc1(nsend, &iremote)); 6522 PetscCall(PetscMalloc1(nsend, &offsets)); 6523 for (k = 0; k < nsend; k++) { 6524 iremote[k].rank = sendto[k]; 6525 iremote[k].index = 0; 6526 nleaves2 += nentries[k]; 6527 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6528 } 6529 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6530 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6531 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6532 PetscCall(PetscSFDestroy(&sf1)); 6533 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6534 6535 /* Build 2nd SF to send remote COOs to their owner */ 6536 PetscSF sf2; 6537 nroots = nroots2; 6538 nleaves = nleaves2; 6539 PetscCall(PetscSFCreate(comm, &sf2)); 6540 PetscCall(PetscSFSetFromOptions(sf2)); 6541 PetscCall(PetscMalloc1(nleaves, &iremote)); 6542 p = 0; 6543 for (k = 0; k < nsend; k++) { 6544 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6545 for (q = 0; q < nentries[k]; q++, p++) { 6546 iremote[p].rank = sendto[k]; 6547 iremote[p].index = offsets[k] + q; 6548 } 6549 } 6550 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6551 6552 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6553 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6554 6555 /* Send the remote COOs to their owner */ 6556 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6557 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6558 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6559 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6560 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6561 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6562 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6563 6564 PetscCall(PetscFree(offsets)); 6565 PetscCall(PetscFree2(sendto, nentries)); 6566 6567 /* Sort received COOs by row along with the permutation array */ 6568 for (k = 0; k < n2; k++) perm2[k] = k; 6569 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6570 6571 /* Split received COOs into diag/offdiag portions */ 6572 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6573 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6574 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6575 6576 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6577 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6578 6579 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6580 PetscInt *Ai, *Bi; 6581 PetscInt *Aj, *Bj; 6582 6583 PetscCall(PetscMalloc1(m + 1, &Ai)); 6584 PetscCall(PetscMalloc1(m + 1, &Bi)); 6585 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6586 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6587 6588 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6589 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6590 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6591 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6592 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6593 6594 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6595 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6596 6597 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6598 /* expect nonzeros in A/B most likely have local contributing entries */ 6599 PetscInt Annz = Ai[m]; 6600 PetscInt Bnnz = Bi[m]; 6601 PetscCount *Ajmap1_new, *Bjmap1_new; 6602 6603 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6604 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6605 6606 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6607 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6608 6609 PetscCall(PetscFree(Aimap1)); 6610 PetscCall(PetscFree(Ajmap1)); 6611 PetscCall(PetscFree(Bimap1)); 6612 PetscCall(PetscFree(Bjmap1)); 6613 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6614 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6615 PetscCall(PetscFree(perm1)); 6616 PetscCall(PetscFree3(i2, j2, perm2)); 6617 6618 Ajmap1 = Ajmap1_new; 6619 Bjmap1 = Bjmap1_new; 6620 6621 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6622 if (Annz < Annz1 + Annz2) { 6623 PetscInt *Aj_new; 6624 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6625 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6626 PetscCall(PetscFree(Aj)); 6627 Aj = Aj_new; 6628 } 6629 6630 if (Bnnz < Bnnz1 + Bnnz2) { 6631 PetscInt *Bj_new; 6632 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6633 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6634 PetscCall(PetscFree(Bj)); 6635 Bj = Bj_new; 6636 } 6637 6638 /* Create new submatrices for on-process and off-process coupling */ 6639 PetscScalar *Aa, *Ba; 6640 MatType rtype; 6641 Mat_SeqAIJ *a, *b; 6642 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6643 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6644 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6645 if (cstart) { 6646 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6647 } 6648 PetscCall(MatDestroy(&mpiaij->A)); 6649 PetscCall(MatDestroy(&mpiaij->B)); 6650 PetscCall(MatGetRootType_Private(mat, &rtype)); 6651 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6652 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6653 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6654 6655 a = (Mat_SeqAIJ *)mpiaij->A->data; 6656 b = (Mat_SeqAIJ *)mpiaij->B->data; 6657 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6658 a->free_a = b->free_a = PETSC_TRUE; 6659 a->free_ij = b->free_ij = PETSC_TRUE; 6660 6661 /* conversion must happen AFTER multiply setup */ 6662 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6663 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6664 PetscCall(VecDestroy(&mpiaij->lvec)); 6665 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6666 6667 mpiaij->coo_n = coo_n; 6668 mpiaij->coo_sf = sf2; 6669 mpiaij->sendlen = nleaves; 6670 mpiaij->recvlen = nroots; 6671 6672 mpiaij->Annz = Annz; 6673 mpiaij->Bnnz = Bnnz; 6674 6675 mpiaij->Annz2 = Annz2; 6676 mpiaij->Bnnz2 = Bnnz2; 6677 6678 mpiaij->Atot1 = Atot1; 6679 mpiaij->Atot2 = Atot2; 6680 mpiaij->Btot1 = Btot1; 6681 mpiaij->Btot2 = Btot2; 6682 6683 mpiaij->Ajmap1 = Ajmap1; 6684 mpiaij->Aperm1 = Aperm1; 6685 6686 mpiaij->Bjmap1 = Bjmap1; 6687 mpiaij->Bperm1 = Bperm1; 6688 6689 mpiaij->Aimap2 = Aimap2; 6690 mpiaij->Ajmap2 = Ajmap2; 6691 mpiaij->Aperm2 = Aperm2; 6692 6693 mpiaij->Bimap2 = Bimap2; 6694 mpiaij->Bjmap2 = Bjmap2; 6695 mpiaij->Bperm2 = Bperm2; 6696 6697 mpiaij->Cperm1 = Cperm1; 6698 6699 /* Allocate in preallocation. If not used, it has zero cost on host */ 6700 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6701 PetscFunctionReturn(PETSC_SUCCESS); 6702 } 6703 6704 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6705 { 6706 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6707 Mat A = mpiaij->A, B = mpiaij->B; 6708 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6709 PetscScalar *Aa, *Ba; 6710 PetscScalar *sendbuf = mpiaij->sendbuf; 6711 PetscScalar *recvbuf = mpiaij->recvbuf; 6712 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6713 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6714 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6715 const PetscCount *Cperm1 = mpiaij->Cperm1; 6716 6717 PetscFunctionBegin; 6718 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6719 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6720 6721 /* Pack entries to be sent to remote */ 6722 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6723 6724 /* Send remote entries to their owner and overlap the communication with local computation */ 6725 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6726 /* Add local entries to A and B */ 6727 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6728 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6729 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6730 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6731 } 6732 for (PetscCount i = 0; i < Bnnz; i++) { 6733 PetscScalar sum = 0.0; 6734 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6735 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6736 } 6737 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6738 6739 /* Add received remote entries to A and B */ 6740 for (PetscCount i = 0; i < Annz2; i++) { 6741 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6742 } 6743 for (PetscCount i = 0; i < Bnnz2; i++) { 6744 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6745 } 6746 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6747 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6748 PetscFunctionReturn(PETSC_SUCCESS); 6749 } 6750 6751 /*MC 6752 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6753 6754 Options Database Keys: 6755 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6756 6757 Level: beginner 6758 6759 Notes: 6760 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6761 in this case the values associated with the rows and columns one passes in are set to zero 6762 in the matrix 6763 6764 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6765 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6766 6767 .seealso: [](chapter_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6768 M*/ 6769 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6770 { 6771 Mat_MPIAIJ *b; 6772 PetscMPIInt size; 6773 6774 PetscFunctionBegin; 6775 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6776 6777 PetscCall(PetscNew(&b)); 6778 B->data = (void *)b; 6779 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6780 B->assembled = PETSC_FALSE; 6781 B->insertmode = NOT_SET_VALUES; 6782 b->size = size; 6783 6784 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6785 6786 /* build cache for off array entries formed */ 6787 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6788 6789 b->donotstash = PETSC_FALSE; 6790 b->colmap = NULL; 6791 b->garray = NULL; 6792 b->roworiented = PETSC_TRUE; 6793 6794 /* stuff used for matrix vector multiply */ 6795 b->lvec = NULL; 6796 b->Mvctx = NULL; 6797 6798 /* stuff for MatGetRow() */ 6799 b->rowindices = NULL; 6800 b->rowvalues = NULL; 6801 b->getrowactive = PETSC_FALSE; 6802 6803 /* flexible pointer used in CUSPARSE classes */ 6804 b->spptr = NULL; 6805 6806 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6807 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6808 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6809 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6810 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6811 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6812 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6813 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6814 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6815 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6816 #if defined(PETSC_HAVE_CUDA) 6817 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6818 #endif 6819 #if defined(PETSC_HAVE_HIP) 6820 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6821 #endif 6822 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6823 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6824 #endif 6825 #if defined(PETSC_HAVE_MKL_SPARSE) 6826 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6827 #endif 6828 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6829 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6830 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6831 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6832 #if defined(PETSC_HAVE_ELEMENTAL) 6833 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6834 #endif 6835 #if defined(PETSC_HAVE_SCALAPACK) 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6837 #endif 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6840 #if defined(PETSC_HAVE_HYPRE) 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6843 #endif 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6846 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6848 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6849 PetscFunctionReturn(PETSC_SUCCESS); 6850 } 6851 6852 /*@C 6853 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6854 and "off-diagonal" part of the matrix in CSR format. 6855 6856 Collective 6857 6858 Input Parameters: 6859 + comm - MPI communicator 6860 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6861 . n - This value should be the same as the local size used in creating the 6862 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6863 calculated if `N` is given) For square matrices `n` is almost always `m`. 6864 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6865 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6866 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6867 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6868 . a - matrix values 6869 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6870 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6871 - oa - matrix values 6872 6873 Output Parameter: 6874 . mat - the matrix 6875 6876 Level: advanced 6877 6878 Notes: 6879 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6880 must free the arrays once the matrix has been destroyed and not before. 6881 6882 The `i` and `j` indices are 0 based 6883 6884 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6885 6886 This sets local rows and cannot be used to set off-processor values. 6887 6888 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6889 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6890 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6891 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6892 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6893 communication if it is known that only local entries will be set. 6894 6895 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6896 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6897 @*/ 6898 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6899 { 6900 Mat_MPIAIJ *maij; 6901 6902 PetscFunctionBegin; 6903 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6904 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6905 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6906 PetscCall(MatCreate(comm, mat)); 6907 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6908 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6909 maij = (Mat_MPIAIJ *)(*mat)->data; 6910 6911 (*mat)->preallocated = PETSC_TRUE; 6912 6913 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6914 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6915 6916 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6917 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6918 6919 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6920 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6921 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6922 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6923 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6924 PetscFunctionReturn(PETSC_SUCCESS); 6925 } 6926 6927 typedef struct { 6928 Mat *mp; /* intermediate products */ 6929 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6930 PetscInt cp; /* number of intermediate products */ 6931 6932 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6933 PetscInt *startsj_s, *startsj_r; 6934 PetscScalar *bufa; 6935 Mat P_oth; 6936 6937 /* may take advantage of merging product->B */ 6938 Mat Bloc; /* B-local by merging diag and off-diag */ 6939 6940 /* cusparse does not have support to split between symbolic and numeric phases. 6941 When api_user is true, we don't need to update the numerical values 6942 of the temporary storage */ 6943 PetscBool reusesym; 6944 6945 /* support for COO values insertion */ 6946 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6947 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6948 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6949 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6950 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6951 PetscMemType mtype; 6952 6953 /* customization */ 6954 PetscBool abmerge; 6955 PetscBool P_oth_bind; 6956 } MatMatMPIAIJBACKEND; 6957 6958 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6959 { 6960 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6961 PetscInt i; 6962 6963 PetscFunctionBegin; 6964 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6965 PetscCall(PetscFree(mmdata->bufa)); 6966 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6967 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6968 PetscCall(MatDestroy(&mmdata->P_oth)); 6969 PetscCall(MatDestroy(&mmdata->Bloc)); 6970 PetscCall(PetscSFDestroy(&mmdata->sf)); 6971 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6972 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6973 PetscCall(PetscFree(mmdata->own[0])); 6974 PetscCall(PetscFree(mmdata->own)); 6975 PetscCall(PetscFree(mmdata->off[0])); 6976 PetscCall(PetscFree(mmdata->off)); 6977 PetscCall(PetscFree(mmdata)); 6978 PetscFunctionReturn(PETSC_SUCCESS); 6979 } 6980 6981 /* Copy selected n entries with indices in idx[] of A to v[]. 6982 If idx is NULL, copy the whole data array of A to v[] 6983 */ 6984 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6985 { 6986 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6987 6988 PetscFunctionBegin; 6989 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 6990 if (f) { 6991 PetscCall((*f)(A, n, idx, v)); 6992 } else { 6993 const PetscScalar *vv; 6994 6995 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 6996 if (n && idx) { 6997 PetscScalar *w = v; 6998 const PetscInt *oi = idx; 6999 PetscInt j; 7000 7001 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7002 } else { 7003 PetscCall(PetscArraycpy(v, vv, n)); 7004 } 7005 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7006 } 7007 PetscFunctionReturn(PETSC_SUCCESS); 7008 } 7009 7010 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7011 { 7012 MatMatMPIAIJBACKEND *mmdata; 7013 PetscInt i, n_d, n_o; 7014 7015 PetscFunctionBegin; 7016 MatCheckProduct(C, 1); 7017 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7018 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7019 if (!mmdata->reusesym) { /* update temporary matrices */ 7020 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7021 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7022 } 7023 mmdata->reusesym = PETSC_FALSE; 7024 7025 for (i = 0; i < mmdata->cp; i++) { 7026 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7027 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7028 } 7029 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7030 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7031 7032 if (mmdata->mptmp[i]) continue; 7033 if (noff) { 7034 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7035 7036 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7037 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7038 n_o += noff; 7039 n_d += nown; 7040 } else { 7041 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7042 7043 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7044 n_d += mm->nz; 7045 } 7046 } 7047 if (mmdata->hasoffproc) { /* offprocess insertion */ 7048 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7049 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7050 } 7051 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7052 PetscFunctionReturn(PETSC_SUCCESS); 7053 } 7054 7055 /* Support for Pt * A, A * P, or Pt * A * P */ 7056 #define MAX_NUMBER_INTERMEDIATE 4 7057 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7058 { 7059 Mat_Product *product = C->product; 7060 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7061 Mat_MPIAIJ *a, *p; 7062 MatMatMPIAIJBACKEND *mmdata; 7063 ISLocalToGlobalMapping P_oth_l2g = NULL; 7064 IS glob = NULL; 7065 const char *prefix; 7066 char pprefix[256]; 7067 const PetscInt *globidx, *P_oth_idx; 7068 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7069 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7070 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7071 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7072 /* a base offset; type-2: sparse with a local to global map table */ 7073 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7074 7075 MatProductType ptype; 7076 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7077 PetscMPIInt size; 7078 7079 PetscFunctionBegin; 7080 MatCheckProduct(C, 1); 7081 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7082 ptype = product->type; 7083 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7084 ptype = MATPRODUCT_AB; 7085 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7086 } 7087 switch (ptype) { 7088 case MATPRODUCT_AB: 7089 A = product->A; 7090 P = product->B; 7091 m = A->rmap->n; 7092 n = P->cmap->n; 7093 M = A->rmap->N; 7094 N = P->cmap->N; 7095 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7096 break; 7097 case MATPRODUCT_AtB: 7098 P = product->A; 7099 A = product->B; 7100 m = P->cmap->n; 7101 n = A->cmap->n; 7102 M = P->cmap->N; 7103 N = A->cmap->N; 7104 hasoffproc = PETSC_TRUE; 7105 break; 7106 case MATPRODUCT_PtAP: 7107 A = product->A; 7108 P = product->B; 7109 m = P->cmap->n; 7110 n = P->cmap->n; 7111 M = P->cmap->N; 7112 N = P->cmap->N; 7113 hasoffproc = PETSC_TRUE; 7114 break; 7115 default: 7116 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7117 } 7118 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7119 if (size == 1) hasoffproc = PETSC_FALSE; 7120 7121 /* defaults */ 7122 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7123 mp[i] = NULL; 7124 mptmp[i] = PETSC_FALSE; 7125 rmapt[i] = -1; 7126 cmapt[i] = -1; 7127 rmapa[i] = NULL; 7128 cmapa[i] = NULL; 7129 } 7130 7131 /* customization */ 7132 PetscCall(PetscNew(&mmdata)); 7133 mmdata->reusesym = product->api_user; 7134 if (ptype == MATPRODUCT_AB) { 7135 if (product->api_user) { 7136 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7137 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7138 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7139 PetscOptionsEnd(); 7140 } else { 7141 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7142 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7143 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7144 PetscOptionsEnd(); 7145 } 7146 } else if (ptype == MATPRODUCT_PtAP) { 7147 if (product->api_user) { 7148 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7149 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7150 PetscOptionsEnd(); 7151 } else { 7152 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7153 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7154 PetscOptionsEnd(); 7155 } 7156 } 7157 a = (Mat_MPIAIJ *)A->data; 7158 p = (Mat_MPIAIJ *)P->data; 7159 PetscCall(MatSetSizes(C, m, n, M, N)); 7160 PetscCall(PetscLayoutSetUp(C->rmap)); 7161 PetscCall(PetscLayoutSetUp(C->cmap)); 7162 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7163 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7164 7165 cp = 0; 7166 switch (ptype) { 7167 case MATPRODUCT_AB: /* A * P */ 7168 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7169 7170 /* A_diag * P_local (merged or not) */ 7171 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7172 /* P is product->B */ 7173 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7174 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7175 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7176 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7177 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7178 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7179 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7180 mp[cp]->product->api_user = product->api_user; 7181 PetscCall(MatProductSetFromOptions(mp[cp])); 7182 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7183 PetscCall(ISGetIndices(glob, &globidx)); 7184 rmapt[cp] = 1; 7185 cmapt[cp] = 2; 7186 cmapa[cp] = globidx; 7187 mptmp[cp] = PETSC_FALSE; 7188 cp++; 7189 } else { /* A_diag * P_diag and A_diag * P_off */ 7190 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7191 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7192 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7193 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7194 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7195 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7196 mp[cp]->product->api_user = product->api_user; 7197 PetscCall(MatProductSetFromOptions(mp[cp])); 7198 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7199 rmapt[cp] = 1; 7200 cmapt[cp] = 1; 7201 mptmp[cp] = PETSC_FALSE; 7202 cp++; 7203 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7204 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7205 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7206 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7207 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7208 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7209 mp[cp]->product->api_user = product->api_user; 7210 PetscCall(MatProductSetFromOptions(mp[cp])); 7211 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7212 rmapt[cp] = 1; 7213 cmapt[cp] = 2; 7214 cmapa[cp] = p->garray; 7215 mptmp[cp] = PETSC_FALSE; 7216 cp++; 7217 } 7218 7219 /* A_off * P_other */ 7220 if (mmdata->P_oth) { 7221 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7222 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7223 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7224 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7225 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7226 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7227 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7228 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7229 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7230 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7231 mp[cp]->product->api_user = product->api_user; 7232 PetscCall(MatProductSetFromOptions(mp[cp])); 7233 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7234 rmapt[cp] = 1; 7235 cmapt[cp] = 2; 7236 cmapa[cp] = P_oth_idx; 7237 mptmp[cp] = PETSC_FALSE; 7238 cp++; 7239 } 7240 break; 7241 7242 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7243 /* A is product->B */ 7244 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7245 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7246 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7247 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7248 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7249 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7250 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7251 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7252 mp[cp]->product->api_user = product->api_user; 7253 PetscCall(MatProductSetFromOptions(mp[cp])); 7254 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7255 PetscCall(ISGetIndices(glob, &globidx)); 7256 rmapt[cp] = 2; 7257 rmapa[cp] = globidx; 7258 cmapt[cp] = 2; 7259 cmapa[cp] = globidx; 7260 mptmp[cp] = PETSC_FALSE; 7261 cp++; 7262 } else { 7263 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7264 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7265 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7266 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7267 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7268 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7269 mp[cp]->product->api_user = product->api_user; 7270 PetscCall(MatProductSetFromOptions(mp[cp])); 7271 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7272 PetscCall(ISGetIndices(glob, &globidx)); 7273 rmapt[cp] = 1; 7274 cmapt[cp] = 2; 7275 cmapa[cp] = globidx; 7276 mptmp[cp] = PETSC_FALSE; 7277 cp++; 7278 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7279 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7280 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7281 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7282 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7283 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7284 mp[cp]->product->api_user = product->api_user; 7285 PetscCall(MatProductSetFromOptions(mp[cp])); 7286 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7287 rmapt[cp] = 2; 7288 rmapa[cp] = p->garray; 7289 cmapt[cp] = 2; 7290 cmapa[cp] = globidx; 7291 mptmp[cp] = PETSC_FALSE; 7292 cp++; 7293 } 7294 break; 7295 case MATPRODUCT_PtAP: 7296 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7297 /* P is product->B */ 7298 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7299 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7300 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7301 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7302 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7303 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7304 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7305 mp[cp]->product->api_user = product->api_user; 7306 PetscCall(MatProductSetFromOptions(mp[cp])); 7307 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7308 PetscCall(ISGetIndices(glob, &globidx)); 7309 rmapt[cp] = 2; 7310 rmapa[cp] = globidx; 7311 cmapt[cp] = 2; 7312 cmapa[cp] = globidx; 7313 mptmp[cp] = PETSC_FALSE; 7314 cp++; 7315 if (mmdata->P_oth) { 7316 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7317 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7318 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7319 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7320 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7321 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7322 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7323 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7324 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7325 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7326 mp[cp]->product->api_user = product->api_user; 7327 PetscCall(MatProductSetFromOptions(mp[cp])); 7328 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7329 mptmp[cp] = PETSC_TRUE; 7330 cp++; 7331 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7332 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7333 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7334 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7335 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7336 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7337 mp[cp]->product->api_user = product->api_user; 7338 PetscCall(MatProductSetFromOptions(mp[cp])); 7339 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7340 rmapt[cp] = 2; 7341 rmapa[cp] = globidx; 7342 cmapt[cp] = 2; 7343 cmapa[cp] = P_oth_idx; 7344 mptmp[cp] = PETSC_FALSE; 7345 cp++; 7346 } 7347 break; 7348 default: 7349 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7350 } 7351 /* sanity check */ 7352 if (size > 1) 7353 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7354 7355 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7356 for (i = 0; i < cp; i++) { 7357 mmdata->mp[i] = mp[i]; 7358 mmdata->mptmp[i] = mptmp[i]; 7359 } 7360 mmdata->cp = cp; 7361 C->product->data = mmdata; 7362 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7363 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7364 7365 /* memory type */ 7366 mmdata->mtype = PETSC_MEMTYPE_HOST; 7367 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7368 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7369 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7370 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7371 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7372 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7373 7374 /* prepare coo coordinates for values insertion */ 7375 7376 /* count total nonzeros of those intermediate seqaij Mats 7377 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7378 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7379 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7380 */ 7381 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7382 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7383 if (mptmp[cp]) continue; 7384 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7385 const PetscInt *rmap = rmapa[cp]; 7386 const PetscInt mr = mp[cp]->rmap->n; 7387 const PetscInt rs = C->rmap->rstart; 7388 const PetscInt re = C->rmap->rend; 7389 const PetscInt *ii = mm->i; 7390 for (i = 0; i < mr; i++) { 7391 const PetscInt gr = rmap[i]; 7392 const PetscInt nz = ii[i + 1] - ii[i]; 7393 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7394 else ncoo_oown += nz; /* this row is local */ 7395 } 7396 } else ncoo_d += mm->nz; 7397 } 7398 7399 /* 7400 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7401 7402 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7403 7404 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7405 7406 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7407 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7408 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7409 7410 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7411 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7412 */ 7413 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7414 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7415 7416 /* gather (i,j) of nonzeros inserted by remote procs */ 7417 if (hasoffproc) { 7418 PetscSF msf; 7419 PetscInt ncoo2, *coo_i2, *coo_j2; 7420 7421 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7422 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7423 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7424 7425 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7426 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7427 PetscInt *idxoff = mmdata->off[cp]; 7428 PetscInt *idxown = mmdata->own[cp]; 7429 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7430 const PetscInt *rmap = rmapa[cp]; 7431 const PetscInt *cmap = cmapa[cp]; 7432 const PetscInt *ii = mm->i; 7433 PetscInt *coi = coo_i + ncoo_o; 7434 PetscInt *coj = coo_j + ncoo_o; 7435 const PetscInt mr = mp[cp]->rmap->n; 7436 const PetscInt rs = C->rmap->rstart; 7437 const PetscInt re = C->rmap->rend; 7438 const PetscInt cs = C->cmap->rstart; 7439 for (i = 0; i < mr; i++) { 7440 const PetscInt *jj = mm->j + ii[i]; 7441 const PetscInt gr = rmap[i]; 7442 const PetscInt nz = ii[i + 1] - ii[i]; 7443 if (gr < rs || gr >= re) { /* this is an offproc row */ 7444 for (j = ii[i]; j < ii[i + 1]; j++) { 7445 *coi++ = gr; 7446 *idxoff++ = j; 7447 } 7448 if (!cmapt[cp]) { /* already global */ 7449 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7450 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7451 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7452 } else { /* offdiag */ 7453 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7454 } 7455 ncoo_o += nz; 7456 } else { /* this is a local row */ 7457 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7458 } 7459 } 7460 } 7461 mmdata->off[cp + 1] = idxoff; 7462 mmdata->own[cp + 1] = idxown; 7463 } 7464 7465 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7466 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7467 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7468 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7469 ncoo = ncoo_d + ncoo_oown + ncoo2; 7470 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7471 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7472 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7473 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7474 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7475 PetscCall(PetscFree2(coo_i, coo_j)); 7476 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7477 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7478 coo_i = coo_i2; 7479 coo_j = coo_j2; 7480 } else { /* no offproc values insertion */ 7481 ncoo = ncoo_d; 7482 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7483 7484 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7485 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7486 PetscCall(PetscSFSetUp(mmdata->sf)); 7487 } 7488 mmdata->hasoffproc = hasoffproc; 7489 7490 /* gather (i,j) of nonzeros inserted locally */ 7491 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7492 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7493 PetscInt *coi = coo_i + ncoo_d; 7494 PetscInt *coj = coo_j + ncoo_d; 7495 const PetscInt *jj = mm->j; 7496 const PetscInt *ii = mm->i; 7497 const PetscInt *cmap = cmapa[cp]; 7498 const PetscInt *rmap = rmapa[cp]; 7499 const PetscInt mr = mp[cp]->rmap->n; 7500 const PetscInt rs = C->rmap->rstart; 7501 const PetscInt re = C->rmap->rend; 7502 const PetscInt cs = C->cmap->rstart; 7503 7504 if (mptmp[cp]) continue; 7505 if (rmapt[cp] == 1) { /* consecutive rows */ 7506 /* fill coo_i */ 7507 for (i = 0; i < mr; i++) { 7508 const PetscInt gr = i + rs; 7509 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7510 } 7511 /* fill coo_j */ 7512 if (!cmapt[cp]) { /* type-0, already global */ 7513 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7514 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7515 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7516 } else { /* type-2, local to global for sparse columns */ 7517 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7518 } 7519 ncoo_d += mm->nz; 7520 } else if (rmapt[cp] == 2) { /* sparse rows */ 7521 for (i = 0; i < mr; i++) { 7522 const PetscInt *jj = mm->j + ii[i]; 7523 const PetscInt gr = rmap[i]; 7524 const PetscInt nz = ii[i + 1] - ii[i]; 7525 if (gr >= rs && gr < re) { /* local rows */ 7526 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7527 if (!cmapt[cp]) { /* type-0, already global */ 7528 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7529 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7530 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7531 } else { /* type-2, local to global for sparse columns */ 7532 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7533 } 7534 ncoo_d += nz; 7535 } 7536 } 7537 } 7538 } 7539 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7540 PetscCall(ISDestroy(&glob)); 7541 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7542 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7543 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7544 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7545 7546 /* preallocate with COO data */ 7547 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7548 PetscCall(PetscFree2(coo_i, coo_j)); 7549 PetscFunctionReturn(PETSC_SUCCESS); 7550 } 7551 7552 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7553 { 7554 Mat_Product *product = mat->product; 7555 #if defined(PETSC_HAVE_DEVICE) 7556 PetscBool match = PETSC_FALSE; 7557 PetscBool usecpu = PETSC_FALSE; 7558 #else 7559 PetscBool match = PETSC_TRUE; 7560 #endif 7561 7562 PetscFunctionBegin; 7563 MatCheckProduct(mat, 1); 7564 #if defined(PETSC_HAVE_DEVICE) 7565 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7566 if (match) { /* we can always fallback to the CPU if requested */ 7567 switch (product->type) { 7568 case MATPRODUCT_AB: 7569 if (product->api_user) { 7570 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7571 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7572 PetscOptionsEnd(); 7573 } else { 7574 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7575 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7576 PetscOptionsEnd(); 7577 } 7578 break; 7579 case MATPRODUCT_AtB: 7580 if (product->api_user) { 7581 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7582 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7583 PetscOptionsEnd(); 7584 } else { 7585 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7586 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7587 PetscOptionsEnd(); 7588 } 7589 break; 7590 case MATPRODUCT_PtAP: 7591 if (product->api_user) { 7592 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7593 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7594 PetscOptionsEnd(); 7595 } else { 7596 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7597 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7598 PetscOptionsEnd(); 7599 } 7600 break; 7601 default: 7602 break; 7603 } 7604 match = (PetscBool)!usecpu; 7605 } 7606 #endif 7607 if (match) { 7608 switch (product->type) { 7609 case MATPRODUCT_AB: 7610 case MATPRODUCT_AtB: 7611 case MATPRODUCT_PtAP: 7612 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7613 break; 7614 default: 7615 break; 7616 } 7617 } 7618 /* fallback to MPIAIJ ops */ 7619 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7620 PetscFunctionReturn(PETSC_SUCCESS); 7621 } 7622 7623 /* 7624 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7625 7626 n - the number of block indices in cc[] 7627 cc - the block indices (must be large enough to contain the indices) 7628 */ 7629 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7630 { 7631 PetscInt cnt = -1, nidx, j; 7632 const PetscInt *idx; 7633 7634 PetscFunctionBegin; 7635 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7636 if (nidx) { 7637 cnt = 0; 7638 cc[cnt] = idx[0] / bs; 7639 for (j = 1; j < nidx; j++) { 7640 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7641 } 7642 } 7643 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7644 *n = cnt + 1; 7645 PetscFunctionReturn(PETSC_SUCCESS); 7646 } 7647 7648 /* 7649 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7650 7651 ncollapsed - the number of block indices 7652 collapsed - the block indices (must be large enough to contain the indices) 7653 */ 7654 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7655 { 7656 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7657 7658 PetscFunctionBegin; 7659 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7660 for (i = start + 1; i < start + bs; i++) { 7661 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7662 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7663 cprevtmp = cprev; 7664 cprev = merged; 7665 merged = cprevtmp; 7666 } 7667 *ncollapsed = nprev; 7668 if (collapsed) *collapsed = cprev; 7669 PetscFunctionReturn(PETSC_SUCCESS); 7670 } 7671 7672 /* 7673 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7674 */ 7675 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7676 { 7677 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7678 Mat tGmat; 7679 MPI_Comm comm; 7680 const PetscScalar *vals; 7681 const PetscInt *idx; 7682 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7683 MatScalar *AA; // this is checked in graph 7684 PetscBool isseqaij; 7685 Mat a, b, c; 7686 MatType jtype; 7687 7688 PetscFunctionBegin; 7689 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7690 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7691 PetscCall(MatGetType(Gmat, &jtype)); 7692 PetscCall(MatCreate(comm, &tGmat)); 7693 PetscCall(MatSetType(tGmat, jtype)); 7694 7695 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7696 Also, if the matrix is symmetric, can we skip this 7697 operation? It can be very expensive on large matrices. */ 7698 7699 // global sizes 7700 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7701 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7702 nloc = Iend - Istart; 7703 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7704 if (isseqaij) { 7705 a = Gmat; 7706 b = NULL; 7707 } else { 7708 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7709 a = d->A; 7710 b = d->B; 7711 garray = d->garray; 7712 } 7713 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7714 for (PetscInt row = 0; row < nloc; row++) { 7715 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7716 d_nnz[row] = ncols; 7717 if (ncols > maxcols) maxcols = ncols; 7718 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7719 } 7720 if (b) { 7721 for (PetscInt row = 0; row < nloc; row++) { 7722 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7723 o_nnz[row] = ncols; 7724 if (ncols > maxcols) maxcols = ncols; 7725 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7726 } 7727 } 7728 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7729 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7730 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7731 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7732 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7733 PetscCall(PetscFree2(d_nnz, o_nnz)); 7734 // 7735 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7736 nnz0 = nnz1 = 0; 7737 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7738 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7739 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7740 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7741 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7742 if (PetscRealPart(sv) > vfilter) { 7743 nnz1++; 7744 PetscInt cid = idx[jj] + Istart; //diag 7745 if (c != a) cid = garray[idx[jj]]; 7746 AA[ncol_row] = vals[jj]; 7747 AJ[ncol_row] = cid; 7748 ncol_row++; 7749 } 7750 } 7751 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7752 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7753 } 7754 } 7755 PetscCall(PetscFree2(AA, AJ)); 7756 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7757 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7758 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7759 7760 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7761 7762 *filteredG = tGmat; 7763 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7764 PetscFunctionReturn(PETSC_SUCCESS); 7765 } 7766 7767 /* 7768 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7769 7770 Input Parameter: 7771 . Amat - matrix 7772 - symmetrize - make the result symmetric 7773 + scale - scale with diagonal 7774 7775 Output Parameter: 7776 . a_Gmat - output scalar graph >= 0 7777 7778 */ 7779 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7780 { 7781 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7782 MPI_Comm comm; 7783 Mat Gmat; 7784 PetscBool ismpiaij, isseqaij; 7785 Mat a, b, c; 7786 MatType jtype; 7787 7788 PetscFunctionBegin; 7789 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7790 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7791 PetscCall(MatGetSize(Amat, &MM, &NN)); 7792 PetscCall(MatGetBlockSize(Amat, &bs)); 7793 nloc = (Iend - Istart) / bs; 7794 7795 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7796 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7797 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7798 7799 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7800 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7801 implementation */ 7802 if (bs > 1) { 7803 PetscCall(MatGetType(Amat, &jtype)); 7804 PetscCall(MatCreate(comm, &Gmat)); 7805 PetscCall(MatSetType(Gmat, jtype)); 7806 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7807 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7808 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7809 PetscInt *d_nnz, *o_nnz; 7810 MatScalar *aa, val, *AA; 7811 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7812 if (isseqaij) { 7813 a = Amat; 7814 b = NULL; 7815 } else { 7816 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7817 a = d->A; 7818 b = d->B; 7819 } 7820 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7821 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7822 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7823 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7824 const PetscInt *cols1, *cols2; 7825 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7826 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7827 nnz[brow / bs] = nc2 / bs; 7828 if (nc2 % bs) ok = 0; 7829 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7830 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7831 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7832 if (nc1 != nc2) ok = 0; 7833 else { 7834 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7835 if (cols1[jj] != cols2[jj]) ok = 0; 7836 if (cols1[jj] % bs != jj % bs) ok = 0; 7837 } 7838 } 7839 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7840 } 7841 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7842 if (!ok) { 7843 PetscCall(PetscFree2(d_nnz, o_nnz)); 7844 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7845 goto old_bs; 7846 } 7847 } 7848 } 7849 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7850 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7851 PetscCall(PetscFree2(d_nnz, o_nnz)); 7852 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7853 // diag 7854 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7855 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7856 ai = aseq->i; 7857 n = ai[brow + 1] - ai[brow]; 7858 aj = aseq->j + ai[brow]; 7859 for (int k = 0; k < n; k += bs) { // block columns 7860 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7861 val = 0; 7862 for (int ii = 0; ii < bs; ii++) { // rows in block 7863 aa = aseq->a + ai[brow + ii] + k; 7864 for (int jj = 0; jj < bs; jj++) { // columns in block 7865 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7866 } 7867 } 7868 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7869 AA[k / bs] = val; 7870 } 7871 grow = Istart / bs + brow / bs; 7872 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7873 } 7874 // off-diag 7875 if (ismpiaij) { 7876 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7877 const PetscScalar *vals; 7878 const PetscInt *cols, *garray = aij->garray; 7879 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7880 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7881 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7882 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7883 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7884 AA[k / bs] = 0; 7885 AJ[cidx] = garray[cols[k]] / bs; 7886 } 7887 nc = ncols / bs; 7888 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7889 for (int ii = 0; ii < bs; ii++) { // rows in block 7890 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7891 for (int k = 0; k < ncols; k += bs) { 7892 for (int jj = 0; jj < bs; jj++) { // cols in block 7893 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7894 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7895 } 7896 } 7897 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7898 } 7899 grow = Istart / bs + brow / bs; 7900 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7901 } 7902 } 7903 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7904 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7905 PetscCall(PetscFree2(AA, AJ)); 7906 } else { 7907 const PetscScalar *vals; 7908 const PetscInt *idx; 7909 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7910 old_bs: 7911 /* 7912 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7913 */ 7914 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7915 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7916 if (isseqaij) { 7917 PetscInt max_d_nnz; 7918 /* 7919 Determine exact preallocation count for (sequential) scalar matrix 7920 */ 7921 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7922 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7923 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7924 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7925 PetscCall(PetscFree3(w0, w1, w2)); 7926 } else if (ismpiaij) { 7927 Mat Daij, Oaij; 7928 const PetscInt *garray; 7929 PetscInt max_d_nnz; 7930 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7931 /* 7932 Determine exact preallocation count for diagonal block portion of scalar matrix 7933 */ 7934 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7935 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7936 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7937 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7938 PetscCall(PetscFree3(w0, w1, w2)); 7939 /* 7940 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7941 */ 7942 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7943 o_nnz[jj] = 0; 7944 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7945 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7946 o_nnz[jj] += ncols; 7947 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7948 } 7949 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7950 } 7951 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7952 /* get scalar copy (norms) of matrix */ 7953 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7954 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7955 PetscCall(PetscFree2(d_nnz, o_nnz)); 7956 for (Ii = Istart; Ii < Iend; Ii++) { 7957 PetscInt dest_row = Ii / bs; 7958 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7959 for (jj = 0; jj < ncols; jj++) { 7960 PetscInt dest_col = idx[jj] / bs; 7961 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7962 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7963 } 7964 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7965 } 7966 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7967 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7968 } 7969 } else { 7970 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7971 else { 7972 Gmat = Amat; 7973 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7974 } 7975 if (isseqaij) { 7976 a = Gmat; 7977 b = NULL; 7978 } else { 7979 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7980 a = d->A; 7981 b = d->B; 7982 } 7983 if (filter >= 0 || scale) { 7984 /* take absolute value of each entry */ 7985 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7986 MatInfo info; 7987 PetscScalar *avals; 7988 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7989 PetscCall(MatSeqAIJGetArray(c, &avals)); 7990 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7991 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7992 } 7993 } 7994 } 7995 if (symmetrize) { 7996 PetscBool isset, issym; 7997 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7998 if (!isset || !issym) { 7999 Mat matTrans; 8000 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8001 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8002 PetscCall(MatDestroy(&matTrans)); 8003 } 8004 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8005 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8006 if (scale) { 8007 /* scale c for all diagonal values = 1 or -1 */ 8008 Vec diag; 8009 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8010 PetscCall(MatGetDiagonal(Gmat, diag)); 8011 PetscCall(VecReciprocal(diag)); 8012 PetscCall(VecSqrtAbs(diag)); 8013 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8014 PetscCall(VecDestroy(&diag)); 8015 } 8016 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8017 8018 if (filter >= 0) { 8019 Mat Fmat = NULL; /* some silly compiler needs this */ 8020 8021 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 8022 PetscCall(MatDestroy(&Gmat)); 8023 Gmat = Fmat; 8024 } 8025 *a_Gmat = Gmat; 8026 PetscFunctionReturn(PETSC_SUCCESS); 8027 } 8028 8029 /* 8030 Special version for direct calls from Fortran 8031 */ 8032 #include <petsc/private/fortranimpl.h> 8033 8034 /* Change these macros so can be used in void function */ 8035 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8036 #undef PetscCall 8037 #define PetscCall(...) \ 8038 do { \ 8039 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8040 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8041 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8042 return; \ 8043 } \ 8044 } while (0) 8045 8046 #undef SETERRQ 8047 #define SETERRQ(comm, ierr, ...) \ 8048 do { \ 8049 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8050 return; \ 8051 } while (0) 8052 8053 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8054 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8055 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8056 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8057 #else 8058 #endif 8059 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8060 { 8061 Mat mat = *mmat; 8062 PetscInt m = *mm, n = *mn; 8063 InsertMode addv = *maddv; 8064 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8065 PetscScalar value; 8066 8067 MatCheckPreallocated(mat, 1); 8068 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8069 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8070 { 8071 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8072 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8073 PetscBool roworiented = aij->roworiented; 8074 8075 /* Some Variables required in the macro */ 8076 Mat A = aij->A; 8077 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8078 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8079 MatScalar *aa; 8080 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8081 Mat B = aij->B; 8082 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8083 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8084 MatScalar *ba; 8085 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8086 * cannot use "#if defined" inside a macro. */ 8087 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8088 8089 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8090 PetscInt nonew = a->nonew; 8091 MatScalar *ap1, *ap2; 8092 8093 PetscFunctionBegin; 8094 PetscCall(MatSeqAIJGetArray(A, &aa)); 8095 PetscCall(MatSeqAIJGetArray(B, &ba)); 8096 for (i = 0; i < m; i++) { 8097 if (im[i] < 0) continue; 8098 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8099 if (im[i] >= rstart && im[i] < rend) { 8100 row = im[i] - rstart; 8101 lastcol1 = -1; 8102 rp1 = aj + ai[row]; 8103 ap1 = aa + ai[row]; 8104 rmax1 = aimax[row]; 8105 nrow1 = ailen[row]; 8106 low1 = 0; 8107 high1 = nrow1; 8108 lastcol2 = -1; 8109 rp2 = bj + bi[row]; 8110 ap2 = ba + bi[row]; 8111 rmax2 = bimax[row]; 8112 nrow2 = bilen[row]; 8113 low2 = 0; 8114 high2 = nrow2; 8115 8116 for (j = 0; j < n; j++) { 8117 if (roworiented) value = v[i * n + j]; 8118 else value = v[i + j * m]; 8119 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8120 if (in[j] >= cstart && in[j] < cend) { 8121 col = in[j] - cstart; 8122 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8123 } else if (in[j] < 0) continue; 8124 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8125 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8126 } else { 8127 if (mat->was_assembled) { 8128 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8129 #if defined(PETSC_USE_CTABLE) 8130 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8131 col--; 8132 #else 8133 col = aij->colmap[in[j]] - 1; 8134 #endif 8135 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8136 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8137 col = in[j]; 8138 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8139 B = aij->B; 8140 b = (Mat_SeqAIJ *)B->data; 8141 bimax = b->imax; 8142 bi = b->i; 8143 bilen = b->ilen; 8144 bj = b->j; 8145 rp2 = bj + bi[row]; 8146 ap2 = ba + bi[row]; 8147 rmax2 = bimax[row]; 8148 nrow2 = bilen[row]; 8149 low2 = 0; 8150 high2 = nrow2; 8151 bm = aij->B->rmap->n; 8152 ba = b->a; 8153 inserted = PETSC_FALSE; 8154 } 8155 } else col = in[j]; 8156 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8157 } 8158 } 8159 } else if (!aij->donotstash) { 8160 if (roworiented) { 8161 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8162 } else { 8163 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8164 } 8165 } 8166 } 8167 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8168 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8169 } 8170 PetscFunctionReturnVoid(); 8171 } 8172 8173 /* Undefining these here since they were redefined from their original definition above! No 8174 * other PETSc functions should be defined past this point, as it is impossible to recover the 8175 * original definitions */ 8176 #undef PetscCall 8177 #undef SETERRQ 8178