1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 #if defined(PETSC_USE_LOG) 15 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 16 #endif 17 PetscCall(MatStashDestroy_Private(&mat->stash)); 18 PetscCall(VecDestroy(&aij->diag)); 19 PetscCall(MatDestroy(&aij->A)); 20 PetscCall(MatDestroy(&aij->B)); 21 #if defined(PETSC_USE_CTABLE) 22 PetscCall(PetscHMapIDestroy(&aij->colmap)); 23 #else 24 PetscCall(PetscFree(aij->colmap)); 25 #endif 26 PetscCall(PetscFree(aij->garray)); 27 PetscCall(VecDestroy(&aij->lvec)); 28 PetscCall(VecScatterDestroy(&aij->Mvctx)); 29 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 30 PetscCall(PetscFree(aij->ld)); 31 32 /* Free COO */ 33 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 34 35 PetscCall(PetscFree(mat->data)); 36 37 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 38 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 39 40 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 50 #if defined(PETSC_HAVE_CUDA) 51 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 52 #endif 53 #if defined(PETSC_HAVE_HIP) 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 55 #endif 56 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 57 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 58 #endif 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 60 #if defined(PETSC_HAVE_ELEMENTAL) 61 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 62 #endif 63 #if defined(PETSC_HAVE_SCALAPACK) 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 65 #endif 66 #if defined(PETSC_HAVE_HYPRE) 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 69 #endif 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 76 #if defined(PETSC_HAVE_MKL_SPARSE) 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 78 #endif 79 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 82 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 84 PetscFunctionReturn(PETSC_SUCCESS); 85 } 86 87 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 88 #define TYPE AIJ 89 #define TYPE_AIJ 90 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 91 #undef TYPE 92 #undef TYPE_AIJ 93 94 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 95 { 96 Mat B; 97 98 PetscFunctionBegin; 99 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 100 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 101 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 102 PetscCall(MatDestroy(&B)); 103 PetscFunctionReturn(PETSC_SUCCESS); 104 } 105 106 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 107 { 108 Mat B; 109 110 PetscFunctionBegin; 111 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 112 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 113 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 /*MC 118 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 119 120 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 121 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 122 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 123 for communicators controlling multiple processes. It is recommended that you call both of 124 the above preallocation routines for simplicity. 125 126 Options Database Key: 127 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 128 129 Developer Note: 130 Level: beginner 131 132 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 133 enough exist. 134 135 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 136 M*/ 137 138 /*MC 139 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 140 141 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 142 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 143 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 144 for communicators controlling multiple processes. It is recommended that you call both of 145 the above preallocation routines for simplicity. 146 147 Options Database Key: 148 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 149 150 Level: beginner 151 152 .seealso: [](chapter_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 153 M*/ 154 155 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 156 { 157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 158 159 PetscFunctionBegin; 160 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 161 A->boundtocpu = flg; 162 #endif 163 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 164 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 165 166 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 167 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 168 * to differ from the parent matrix. */ 169 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 170 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 171 172 PetscFunctionReturn(PETSC_SUCCESS); 173 } 174 175 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 176 { 177 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 178 179 PetscFunctionBegin; 180 if (mat->A) { 181 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 182 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 183 } 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 188 { 189 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 191 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 192 const PetscInt *ia, *ib; 193 const MatScalar *aa, *bb, *aav, *bav; 194 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 195 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 196 197 PetscFunctionBegin; 198 *keptrows = NULL; 199 200 ia = a->i; 201 ib = b->i; 202 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 203 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 204 for (i = 0; i < m; i++) { 205 na = ia[i + 1] - ia[i]; 206 nb = ib[i + 1] - ib[i]; 207 if (!na && !nb) { 208 cnt++; 209 goto ok1; 210 } 211 aa = aav + ia[i]; 212 for (j = 0; j < na; j++) { 213 if (aa[j] != 0.0) goto ok1; 214 } 215 bb = bav + ib[i]; 216 for (j = 0; j < nb; j++) { 217 if (bb[j] != 0.0) goto ok1; 218 } 219 cnt++; 220 ok1:; 221 } 222 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 223 if (!n0rows) { 224 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 225 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 226 PetscFunctionReturn(PETSC_SUCCESS); 227 } 228 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 229 cnt = 0; 230 for (i = 0; i < m; i++) { 231 na = ia[i + 1] - ia[i]; 232 nb = ib[i + 1] - ib[i]; 233 if (!na && !nb) continue; 234 aa = aav + ia[i]; 235 for (j = 0; j < na; j++) { 236 if (aa[j] != 0.0) { 237 rows[cnt++] = rstart + i; 238 goto ok2; 239 } 240 } 241 bb = bav + ib[i]; 242 for (j = 0; j < nb; j++) { 243 if (bb[j] != 0.0) { 244 rows[cnt++] = rstart + i; 245 goto ok2; 246 } 247 } 248 ok2:; 249 } 250 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 251 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 252 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 253 PetscFunctionReturn(PETSC_SUCCESS); 254 } 255 256 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 257 { 258 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 259 PetscBool cong; 260 261 PetscFunctionBegin; 262 PetscCall(MatHasCongruentLayouts(Y, &cong)); 263 if (Y->assembled && cong) { 264 PetscCall(MatDiagonalSet(aij->A, D, is)); 265 } else { 266 PetscCall(MatDiagonalSet_Default(Y, D, is)); 267 } 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 274 PetscInt i, rstart, nrows, *rows; 275 276 PetscFunctionBegin; 277 *zrows = NULL; 278 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 279 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 280 for (i = 0; i < nrows; i++) rows[i] += rstart; 281 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 288 PetscInt i, m, n, *garray = aij->garray; 289 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 290 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 291 PetscReal *work; 292 const PetscScalar *dummy; 293 294 PetscFunctionBegin; 295 PetscCall(MatGetSize(A, &m, &n)); 296 PetscCall(PetscCalloc1(n, &work)); 297 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 298 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 299 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 300 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 301 if (type == NORM_2) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 304 } else if (type == NORM_1) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 307 } else if (type == NORM_INFINITY) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 310 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 311 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 312 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 313 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 314 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 315 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 316 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 317 if (type == NORM_INFINITY) { 318 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 319 } else { 320 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 321 } 322 PetscCall(PetscFree(work)); 323 if (type == NORM_2) { 324 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 325 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 326 for (i = 0; i < n; i++) reductions[i] /= m; 327 } 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 332 { 333 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 334 IS sis, gis; 335 const PetscInt *isis, *igis; 336 PetscInt n, *iis, nsis, ngis, rstart, i; 337 338 PetscFunctionBegin; 339 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 340 PetscCall(MatFindNonzeroRows(a->B, &gis)); 341 PetscCall(ISGetSize(gis, &ngis)); 342 PetscCall(ISGetSize(sis, &nsis)); 343 PetscCall(ISGetIndices(sis, &isis)); 344 PetscCall(ISGetIndices(gis, &igis)); 345 346 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 347 PetscCall(PetscArraycpy(iis, igis, ngis)); 348 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 349 n = ngis + nsis; 350 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 351 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 352 for (i = 0; i < n; i++) iis[i] += rstart; 353 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 354 355 PetscCall(ISRestoreIndices(sis, &isis)); 356 PetscCall(ISRestoreIndices(gis, &igis)); 357 PetscCall(ISDestroy(&sis)); 358 PetscCall(ISDestroy(&gis)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to access. 368 */ 369 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 370 { 371 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 372 PetscInt n = aij->B->cmap->n, i; 373 374 PetscFunctionBegin; 375 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 376 #if defined(PETSC_USE_CTABLE) 377 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 378 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 379 #else 380 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 381 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 382 #endif 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 387 { \ 388 if (col <= lastcol1) low1 = 0; \ 389 else high1 = nrow1; \ 390 lastcol1 = col; \ 391 while (high1 - low1 > 5) { \ 392 t = (low1 + high1) / 2; \ 393 if (rp1[t] > col) high1 = t; \ 394 else low1 = t; \ 395 } \ 396 for (_i = low1; _i < high1; _i++) { \ 397 if (rp1[_i] > col) break; \ 398 if (rp1[_i] == col) { \ 399 if (addv == ADD_VALUES) { \ 400 ap1[_i] += value; \ 401 /* Not sure LogFlops will slow dow the code or not */ \ 402 (void)PetscLogFlops(1.0); \ 403 } else ap1[_i] = value; \ 404 goto a_noinsert; \ 405 } \ 406 } \ 407 if (value == 0.0 && ignorezeroentries && row != col) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 if (nonew == 1) { \ 413 low1 = 0; \ 414 high1 = nrow1; \ 415 goto a_noinsert; \ 416 } \ 417 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 418 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 419 N = nrow1++ - 1; \ 420 a->nz++; \ 421 high1++; \ 422 /* shift up all the later entries in this row */ \ 423 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 424 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 425 rp1[_i] = col; \ 426 ap1[_i] = value; \ 427 A->nonzerostate++; \ 428 a_noinsert:; \ 429 ailen[row] = nrow1; \ 430 } 431 432 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 433 { \ 434 if (col <= lastcol2) low2 = 0; \ 435 else high2 = nrow2; \ 436 lastcol2 = col; \ 437 while (high2 - low2 > 5) { \ 438 t = (low2 + high2) / 2; \ 439 if (rp2[t] > col) high2 = t; \ 440 else low2 = t; \ 441 } \ 442 for (_i = low2; _i < high2; _i++) { \ 443 if (rp2[_i] > col) break; \ 444 if (rp2[_i] == col) { \ 445 if (addv == ADD_VALUES) { \ 446 ap2[_i] += value; \ 447 (void)PetscLogFlops(1.0); \ 448 } else ap2[_i] = value; \ 449 goto b_noinsert; \ 450 } \ 451 } \ 452 if (value == 0.0 && ignorezeroentries) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 if (nonew == 1) { \ 458 low2 = 0; \ 459 high2 = nrow2; \ 460 goto b_noinsert; \ 461 } \ 462 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 463 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 464 N = nrow2++ - 1; \ 465 b->nz++; \ 466 high2++; \ 467 /* shift up all the later entries in this row */ \ 468 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 469 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 470 rp2[_i] = col; \ 471 ap2[_i] = value; \ 472 B->nonzerostate++; \ 473 b_noinsert:; \ 474 bilen[row] = nrow2; \ 475 } 476 477 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 478 { 479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 480 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 481 PetscInt l, *garray = mat->garray, diag; 482 PetscScalar *aa, *ba; 483 484 PetscFunctionBegin; 485 /* code only works for square matrices A */ 486 487 /* find size of row to the left of the diagonal part */ 488 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 489 row = row - diag; 490 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 491 if (garray[b->j[b->i[row] + l]] > diag) break; 492 } 493 if (l) { 494 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 495 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 496 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 497 } 498 499 /* diagonal part */ 500 if (a->i[row + 1] - a->i[row]) { 501 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 502 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 503 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 504 } 505 506 /* right of diagonal part */ 507 if (b->i[row + 1] - b->i[row] - l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 PetscFunctionReturn(PETSC_SUCCESS); 513 } 514 515 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 516 { 517 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 518 PetscScalar value = 0.0; 519 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 520 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 521 PetscBool roworiented = aij->roworiented; 522 523 /* Some Variables required in the macro */ 524 Mat A = aij->A; 525 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 526 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 527 PetscBool ignorezeroentries = a->ignorezeroentries; 528 Mat B = aij->B; 529 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 530 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 531 MatScalar *aa, *ba; 532 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 533 PetscInt nonew; 534 MatScalar *ap1, *ap2; 535 536 PetscFunctionBegin; 537 PetscCall(MatSeqAIJGetArray(A, &aa)); 538 PetscCall(MatSeqAIJGetArray(B, &ba)); 539 for (i = 0; i < m; i++) { 540 if (im[i] < 0) continue; 541 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 542 if (im[i] >= rstart && im[i] < rend) { 543 row = im[i] - rstart; 544 lastcol1 = -1; 545 rp1 = aj + ai[row]; 546 ap1 = aa + ai[row]; 547 rmax1 = aimax[row]; 548 nrow1 = ailen[row]; 549 low1 = 0; 550 high1 = nrow1; 551 lastcol2 = -1; 552 rp2 = bj + bi[row]; 553 ap2 = ba + bi[row]; 554 rmax2 = bimax[row]; 555 nrow2 = bilen[row]; 556 low2 = 0; 557 high2 = nrow2; 558 559 for (j = 0; j < n; j++) { 560 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 561 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 562 if (in[j] >= cstart && in[j] < cend) { 563 col = in[j] - cstart; 564 nonew = a->nonew; 565 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 566 } else if (in[j] < 0) { 567 continue; 568 } else { 569 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 570 if (mat->was_assembled) { 571 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 572 #if defined(PETSC_USE_CTABLE) 573 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 574 col--; 575 #else 576 col = aij->colmap[in[j]] - 1; 577 #endif 578 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 579 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 580 col = in[j]; 581 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 582 B = aij->B; 583 b = (Mat_SeqAIJ *)B->data; 584 bimax = b->imax; 585 bi = b->i; 586 bilen = b->ilen; 587 bj = b->j; 588 ba = b->a; 589 rp2 = bj + bi[row]; 590 ap2 = ba + bi[row]; 591 rmax2 = bimax[row]; 592 nrow2 = bilen[row]; 593 low2 = 0; 594 high2 = nrow2; 595 bm = aij->B->rmap->n; 596 ba = b->a; 597 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 598 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 599 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 600 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 601 } 602 } else col = in[j]; 603 nonew = b->nonew; 604 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 605 } 606 } 607 } else { 608 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 609 if (!aij->donotstash) { 610 mat->assembled = PETSC_FALSE; 611 if (roworiented) { 612 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 613 } else { 614 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 615 } 616 } 617 } 618 } 619 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 620 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 621 PetscFunctionReturn(PETSC_SUCCESS); 622 } 623 624 /* 625 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 626 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 627 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 628 */ 629 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 632 Mat A = aij->A; /* diagonal part of the matrix */ 633 Mat B = aij->B; /* offdiagonal part of the matrix */ 634 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 635 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 636 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 637 PetscInt *ailen = a->ilen, *aj = a->j; 638 PetscInt *bilen = b->ilen, *bj = b->j; 639 PetscInt am = aij->A->rmap->n, j; 640 PetscInt diag_so_far = 0, dnz; 641 PetscInt offd_so_far = 0, onz; 642 643 PetscFunctionBegin; 644 /* Iterate over all rows of the matrix */ 645 for (j = 0; j < am; j++) { 646 dnz = onz = 0; 647 /* Iterate over all non-zero columns of the current row */ 648 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 649 /* If column is in the diagonal */ 650 if (mat_j[col] >= cstart && mat_j[col] < cend) { 651 aj[diag_so_far++] = mat_j[col] - cstart; 652 dnz++; 653 } else { /* off-diagonal entries */ 654 bj[offd_so_far++] = mat_j[col]; 655 onz++; 656 } 657 } 658 ailen[j] = dnz; 659 bilen[j] = onz; 660 } 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 /* 665 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 666 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 667 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 668 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 669 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 670 */ 671 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 672 { 673 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 674 Mat A = aij->A; /* diagonal part of the matrix */ 675 Mat B = aij->B; /* offdiagonal part of the matrix */ 676 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 677 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 679 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 680 PetscInt *ailen = a->ilen, *aj = a->j; 681 PetscInt *bilen = b->ilen, *bj = b->j; 682 PetscInt am = aij->A->rmap->n, j; 683 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 684 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 685 PetscScalar *aa = a->a, *ba = b->a; 686 687 PetscFunctionBegin; 688 /* Iterate over all rows of the matrix */ 689 for (j = 0; j < am; j++) { 690 dnz_row = onz_row = 0; 691 rowstart_offd = full_offd_i[j]; 692 rowstart_diag = full_diag_i[j]; 693 /* Iterate over all non-zero columns of the current row */ 694 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 695 /* If column is in the diagonal */ 696 if (mat_j[col] >= cstart && mat_j[col] < cend) { 697 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 698 aa[rowstart_diag + dnz_row] = mat_a[col]; 699 dnz_row++; 700 } else { /* off-diagonal entries */ 701 bj[rowstart_offd + onz_row] = mat_j[col]; 702 ba[rowstart_offd + onz_row] = mat_a[col]; 703 onz_row++; 704 } 705 } 706 ailen[j] = dnz_row; 707 bilen[j] = onz_row; 708 } 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 715 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 716 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 717 718 PetscFunctionBegin; 719 for (i = 0; i < m; i++) { 720 if (idxm[i] < 0) continue; /* negative row */ 721 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 722 if (idxm[i] >= rstart && idxm[i] < rend) { 723 row = idxm[i] - rstart; 724 for (j = 0; j < n; j++) { 725 if (idxn[j] < 0) continue; /* negative column */ 726 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 727 if (idxn[j] >= cstart && idxn[j] < cend) { 728 col = idxn[j] - cstart; 729 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 730 } else { 731 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 732 #if defined(PETSC_USE_CTABLE) 733 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 734 col--; 735 #else 736 col = aij->colmap[idxn[j]] - 1; 737 #endif 738 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 739 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 740 } 741 } 742 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 743 } 744 PetscFunctionReturn(PETSC_SUCCESS); 745 } 746 747 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 748 { 749 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 750 PetscInt nstash, reallocs; 751 752 PetscFunctionBegin; 753 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 754 755 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 756 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 757 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 758 PetscFunctionReturn(PETSC_SUCCESS); 759 } 760 761 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 762 { 763 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 764 PetscMPIInt n; 765 PetscInt i, j, rstart, ncols, flg; 766 PetscInt *row, *col; 767 PetscBool other_disassembled; 768 PetscScalar *val; 769 770 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 771 772 PetscFunctionBegin; 773 if (!aij->donotstash && !mat->nooffprocentries) { 774 while (1) { 775 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 776 if (!flg) break; 777 778 for (i = 0; i < n;) { 779 /* Now identify the consecutive vals belonging to the same row */ 780 for (j = i, rstart = row[j]; j < n; j++) { 781 if (row[j] != rstart) break; 782 } 783 if (j < n) ncols = j - i; 784 else ncols = n - i; 785 /* Now assemble all these values with a single function call */ 786 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 787 i = j; 788 } 789 } 790 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 791 } 792 #if defined(PETSC_HAVE_DEVICE) 793 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 794 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 795 if (mat->boundtocpu) { 796 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 797 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 798 } 799 #endif 800 PetscCall(MatAssemblyBegin(aij->A, mode)); 801 PetscCall(MatAssemblyEnd(aij->A, mode)); 802 803 /* determine if any processor has disassembled, if so we must 804 also disassemble ourself, in order that we may reassemble. */ 805 /* 806 if nonzero structure of submatrix B cannot change then we know that 807 no processor disassembled thus we can skip this stuff 808 */ 809 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 810 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 811 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 812 PetscCall(MatDisAssemble_MPIAIJ(mat)); 813 } 814 } 815 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 816 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 817 #if defined(PETSC_HAVE_DEVICE) 818 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 819 #endif 820 PetscCall(MatAssemblyBegin(aij->B, mode)); 821 PetscCall(MatAssemblyEnd(aij->B, mode)); 822 823 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 824 825 aij->rowvalues = NULL; 826 827 PetscCall(VecDestroy(&aij->diag)); 828 829 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 830 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 831 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 832 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 833 } 834 #if defined(PETSC_HAVE_DEVICE) 835 mat->offloadmask = PETSC_OFFLOAD_BOTH; 836 #endif 837 PetscFunctionReturn(PETSC_SUCCESS); 838 } 839 840 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 841 { 842 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 843 844 PetscFunctionBegin; 845 PetscCall(MatZeroEntries(l->A)); 846 PetscCall(MatZeroEntries(l->B)); 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 851 { 852 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 853 PetscObjectState sA, sB; 854 PetscInt *lrows; 855 PetscInt r, len; 856 PetscBool cong, lch, gch; 857 858 PetscFunctionBegin; 859 /* get locally owned rows */ 860 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 861 PetscCall(MatHasCongruentLayouts(A, &cong)); 862 /* fix right hand side if needed */ 863 if (x && b) { 864 const PetscScalar *xx; 865 PetscScalar *bb; 866 867 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 868 PetscCall(VecGetArrayRead(x, &xx)); 869 PetscCall(VecGetArray(b, &bb)); 870 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 871 PetscCall(VecRestoreArrayRead(x, &xx)); 872 PetscCall(VecRestoreArray(b, &bb)); 873 } 874 875 sA = mat->A->nonzerostate; 876 sB = mat->B->nonzerostate; 877 878 if (diag != 0.0 && cong) { 879 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 880 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 881 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 882 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 883 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 884 PetscInt nnwA, nnwB; 885 PetscBool nnzA, nnzB; 886 887 nnwA = aijA->nonew; 888 nnwB = aijB->nonew; 889 nnzA = aijA->keepnonzeropattern; 890 nnzB = aijB->keepnonzeropattern; 891 if (!nnzA) { 892 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 893 aijA->nonew = 0; 894 } 895 if (!nnzB) { 896 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 897 aijB->nonew = 0; 898 } 899 /* Must zero here before the next loop */ 900 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 901 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 902 for (r = 0; r < len; ++r) { 903 const PetscInt row = lrows[r] + A->rmap->rstart; 904 if (row >= A->cmap->N) continue; 905 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 906 } 907 aijA->nonew = nnwA; 908 aijB->nonew = nnwB; 909 } else { 910 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 911 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 912 } 913 PetscCall(PetscFree(lrows)); 914 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 915 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 916 917 /* reduce nonzerostate */ 918 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 919 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 920 if (gch) A->nonzerostate++; 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 924 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 925 { 926 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 927 PetscMPIInt n = A->rmap->n; 928 PetscInt i, j, r, m, len = 0; 929 PetscInt *lrows, *owners = A->rmap->range; 930 PetscMPIInt p = 0; 931 PetscSFNode *rrows; 932 PetscSF sf; 933 const PetscScalar *xx; 934 PetscScalar *bb, *mask, *aij_a; 935 Vec xmask, lmask; 936 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 937 const PetscInt *aj, *ii, *ridx; 938 PetscScalar *aa; 939 940 PetscFunctionBegin; 941 /* Create SF where leaves are input rows and roots are owned rows */ 942 PetscCall(PetscMalloc1(n, &lrows)); 943 for (r = 0; r < n; ++r) lrows[r] = -1; 944 PetscCall(PetscMalloc1(N, &rrows)); 945 for (r = 0; r < N; ++r) { 946 const PetscInt idx = rows[r]; 947 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 948 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 949 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 950 } 951 rrows[r].rank = p; 952 rrows[r].index = rows[r] - owners[p]; 953 } 954 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 955 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 956 /* Collect flags for rows to be zeroed */ 957 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 958 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 959 PetscCall(PetscSFDestroy(&sf)); 960 /* Compress and put in row numbers */ 961 for (r = 0; r < n; ++r) 962 if (lrows[r] >= 0) lrows[len++] = r; 963 /* zero diagonal part of matrix */ 964 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 965 /* handle off diagonal part of matrix */ 966 PetscCall(MatCreateVecs(A, &xmask, NULL)); 967 PetscCall(VecDuplicate(l->lvec, &lmask)); 968 PetscCall(VecGetArray(xmask, &bb)); 969 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 970 PetscCall(VecRestoreArray(xmask, &bb)); 971 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 972 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 973 PetscCall(VecDestroy(&xmask)); 974 if (x && b) { /* this code is buggy when the row and column layout don't match */ 975 PetscBool cong; 976 977 PetscCall(MatHasCongruentLayouts(A, &cong)); 978 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 979 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 980 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 981 PetscCall(VecGetArrayRead(l->lvec, &xx)); 982 PetscCall(VecGetArray(b, &bb)); 983 } 984 PetscCall(VecGetArray(lmask, &mask)); 985 /* remove zeroed rows of off diagonal matrix */ 986 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 987 ii = aij->i; 988 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 989 /* loop over all elements of off process part of matrix zeroing removed columns*/ 990 if (aij->compressedrow.use) { 991 m = aij->compressedrow.nrows; 992 ii = aij->compressedrow.i; 993 ridx = aij->compressedrow.rindex; 994 for (i = 0; i < m; i++) { 995 n = ii[i + 1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij_a + ii[i]; 998 999 for (j = 0; j < n; j++) { 1000 if (PetscAbsScalar(mask[*aj])) { 1001 if (b) bb[*ridx] -= *aa * xx[*aj]; 1002 *aa = 0.0; 1003 } 1004 aa++; 1005 aj++; 1006 } 1007 ridx++; 1008 } 1009 } else { /* do not use compressed row format */ 1010 m = l->B->rmap->n; 1011 for (i = 0; i < m; i++) { 1012 n = ii[i + 1] - ii[i]; 1013 aj = aij->j + ii[i]; 1014 aa = aij_a + ii[i]; 1015 for (j = 0; j < n; j++) { 1016 if (PetscAbsScalar(mask[*aj])) { 1017 if (b) bb[i] -= *aa * xx[*aj]; 1018 *aa = 0.0; 1019 } 1020 aa++; 1021 aj++; 1022 } 1023 } 1024 } 1025 if (x && b) { 1026 PetscCall(VecRestoreArray(b, &bb)); 1027 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1028 } 1029 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1030 PetscCall(VecRestoreArray(lmask, &mask)); 1031 PetscCall(VecDestroy(&lmask)); 1032 PetscCall(PetscFree(lrows)); 1033 1034 /* only change matrix nonzero state if pattern was allowed to be changed */ 1035 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1036 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1037 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1038 } 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } 1041 1042 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1045 PetscInt nt; 1046 VecScatter Mvctx = a->Mvctx; 1047 1048 PetscFunctionBegin; 1049 PetscCall(VecGetLocalSize(xx, &nt)); 1050 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1051 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1052 PetscUseTypeMethod(a->A, mult, xx, yy); 1053 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1054 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1064 PetscFunctionReturn(PETSC_SUCCESS); 1065 } 1066 1067 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1070 VecScatter Mvctx = a->Mvctx; 1071 1072 PetscFunctionBegin; 1073 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1074 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1075 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1076 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1077 PetscFunctionReturn(PETSC_SUCCESS); 1078 } 1079 1080 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1083 1084 PetscFunctionBegin; 1085 /* do nondiagonal part */ 1086 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1087 /* do local part */ 1088 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1089 /* add partial results together */ 1090 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1091 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1092 PetscFunctionReturn(PETSC_SUCCESS); 1093 } 1094 1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1096 { 1097 MPI_Comm comm; 1098 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1099 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1100 IS Me, Notme; 1101 PetscInt M, N, first, last, *notme, i; 1102 PetscBool lf; 1103 PetscMPIInt size; 1104 1105 PetscFunctionBegin; 1106 /* Easy test: symmetric diagonal block */ 1107 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1108 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1109 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1110 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1111 PetscCallMPI(MPI_Comm_size(comm, &size)); 1112 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1113 1114 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1115 PetscCall(MatGetSize(Amat, &M, &N)); 1116 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1117 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1118 for (i = 0; i < first; i++) notme[i] = i; 1119 for (i = last; i < M; i++) notme[i - last + first] = i; 1120 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1121 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1122 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1123 Aoff = Aoffs[0]; 1124 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1125 Boff = Boffs[0]; 1126 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1127 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1128 PetscCall(MatDestroyMatrices(1, &Boffs)); 1129 PetscCall(ISDestroy(&Me)); 1130 PetscCall(ISDestroy(&Notme)); 1131 PetscCall(PetscFree(notme)); 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1136 { 1137 PetscFunctionBegin; 1138 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1139 PetscFunctionReturn(PETSC_SUCCESS); 1140 } 1141 1142 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1145 1146 PetscFunctionBegin; 1147 /* do nondiagonal part */ 1148 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1149 /* do local part */ 1150 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1151 /* add partial results together */ 1152 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1153 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1154 PetscFunctionReturn(PETSC_SUCCESS); 1155 } 1156 1157 /* 1158 This only works correctly for square matrices where the subblock A->A is the 1159 diagonal block 1160 */ 1161 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1162 { 1163 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1164 1165 PetscFunctionBegin; 1166 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1167 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1168 PetscCall(MatGetDiagonal(a->A, v)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1173 { 1174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1175 1176 PetscFunctionBegin; 1177 PetscCall(MatScale(a->A, aa)); 1178 PetscCall(MatScale(a->B, aa)); 1179 PetscFunctionReturn(PETSC_SUCCESS); 1180 } 1181 1182 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1183 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1184 { 1185 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1186 1187 PetscFunctionBegin; 1188 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1189 PetscCall(PetscFree(aij->Aperm1)); 1190 PetscCall(PetscFree(aij->Bperm1)); 1191 PetscCall(PetscFree(aij->Ajmap1)); 1192 PetscCall(PetscFree(aij->Bjmap1)); 1193 1194 PetscCall(PetscFree(aij->Aimap2)); 1195 PetscCall(PetscFree(aij->Bimap2)); 1196 PetscCall(PetscFree(aij->Aperm2)); 1197 PetscCall(PetscFree(aij->Bperm2)); 1198 PetscCall(PetscFree(aij->Ajmap2)); 1199 PetscCall(PetscFree(aij->Bjmap2)); 1200 1201 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1202 PetscCall(PetscFree(aij->Cperm1)); 1203 PetscFunctionReturn(PETSC_SUCCESS); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa, *ba; 1213 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1214 PetscInt64 nz, hnz; 1215 PetscInt *rowlens; 1216 PetscInt *colidxs; 1217 PetscScalar *matvals; 1218 PetscMPIInt rank; 1219 1220 PetscFunctionBegin; 1221 PetscCall(PetscViewerSetUp(viewer)); 1222 1223 M = mat->rmap->N; 1224 N = mat->cmap->N; 1225 m = mat->rmap->n; 1226 rs = mat->rmap->rstart; 1227 cs = mat->cmap->rstart; 1228 nz = A->nz + B->nz; 1229 1230 /* write matrix header */ 1231 header[0] = MAT_FILE_CLASSID; 1232 header[1] = M; 1233 header[2] = N; 1234 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1235 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1236 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1237 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1238 1239 /* fill in and store row lengths */ 1240 PetscCall(PetscMalloc1(m, &rowlens)); 1241 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1242 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1243 PetscCall(PetscFree(rowlens)); 1244 1245 /* fill in and store column indices */ 1246 PetscCall(PetscMalloc1(nz, &colidxs)); 1247 for (cnt = 0, i = 0; i < m; i++) { 1248 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1249 if (garray[B->j[jb]] > cs) break; 1250 colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1253 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1254 } 1255 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1256 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1257 PetscCall(PetscFree(colidxs)); 1258 1259 /* fill in and store nonzero values */ 1260 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1261 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1262 PetscCall(PetscMalloc1(nz, &matvals)); 1263 for (cnt = 0, i = 0; i < m; i++) { 1264 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1265 if (garray[B->j[jb]] > cs) break; 1266 matvals[cnt++] = ba[jb]; 1267 } 1268 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1269 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1270 } 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1272 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1273 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1274 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1275 PetscCall(PetscFree(matvals)); 1276 1277 /* write block size option to the viewer's .info file */ 1278 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 1282 #include <petscdraw.h> 1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1286 PetscMPIInt rank = aij->rank, size = aij->size; 1287 PetscBool isdraw, iascii, isbinary; 1288 PetscViewer sviewer; 1289 PetscViewerFormat format; 1290 1291 PetscFunctionBegin; 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1294 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1295 if (iascii) { 1296 PetscCall(PetscViewerGetFormat(viewer, &format)); 1297 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1298 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1299 PetscCall(PetscMalloc1(size, &nz)); 1300 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1301 for (i = 0; i < (PetscInt)size; i++) { 1302 nmax = PetscMax(nmax, nz[i]); 1303 nmin = PetscMin(nmin, nz[i]); 1304 navg += nz[i]; 1305 } 1306 PetscCall(PetscFree(nz)); 1307 navg = navg / size; 1308 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1309 PetscFunctionReturn(PETSC_SUCCESS); 1310 } 1311 PetscCall(PetscViewerGetFormat(viewer, &format)); 1312 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1313 MatInfo info; 1314 PetscInt *inodes = NULL; 1315 1316 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1317 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1318 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1319 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1320 if (!inodes) { 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1322 (double)info.memory)); 1323 } else { 1324 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1325 (double)info.memory)); 1326 } 1327 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1328 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1329 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1330 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1331 PetscCall(PetscViewerFlush(viewer)); 1332 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1333 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1334 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1335 PetscFunctionReturn(PETSC_SUCCESS); 1336 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1337 PetscInt inodecount, inodelimit, *inodes; 1338 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1339 if (inodes) { 1340 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1341 } else { 1342 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1343 } 1344 PetscFunctionReturn(PETSC_SUCCESS); 1345 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1346 PetscFunctionReturn(PETSC_SUCCESS); 1347 } 1348 } else if (isbinary) { 1349 if (size == 1) { 1350 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1351 PetscCall(MatView(aij->A, viewer)); 1352 } else { 1353 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1354 } 1355 PetscFunctionReturn(PETSC_SUCCESS); 1356 } else if (iascii && size == 1) { 1357 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1358 PetscCall(MatView(aij->A, viewer)); 1359 PetscFunctionReturn(PETSC_SUCCESS); 1360 } else if (isdraw) { 1361 PetscDraw draw; 1362 PetscBool isnull; 1363 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1364 PetscCall(PetscDrawIsNull(draw, &isnull)); 1365 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1366 } 1367 1368 { /* assemble the entire matrix onto first processor */ 1369 Mat A = NULL, Av; 1370 IS isrow, iscol; 1371 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1373 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1374 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1375 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1376 /* The commented code uses MatCreateSubMatrices instead */ 1377 /* 1378 Mat *AA, A = NULL, Av; 1379 IS isrow,iscol; 1380 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1382 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1383 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1384 if (rank == 0) { 1385 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1386 A = AA[0]; 1387 Av = AA[0]; 1388 } 1389 PetscCall(MatDestroySubMatrices(1,&AA)); 1390 */ 1391 PetscCall(ISDestroy(&iscol)); 1392 PetscCall(ISDestroy(&isrow)); 1393 /* 1394 Everyone has to call to draw the matrix since the graphics waits are 1395 synchronized across all processors that share the PetscDraw object 1396 */ 1397 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1398 if (rank == 0) { 1399 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1400 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1401 } 1402 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1403 PetscCall(PetscViewerFlush(viewer)); 1404 PetscCall(MatDestroy(&A)); 1405 } 1406 PetscFunctionReturn(PETSC_SUCCESS); 1407 } 1408 1409 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1410 { 1411 PetscBool iascii, isdraw, issocket, isbinary; 1412 1413 PetscFunctionBegin; 1414 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1418 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1419 PetscFunctionReturn(PETSC_SUCCESS); 1420 } 1421 1422 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1423 { 1424 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1425 Vec bb1 = NULL; 1426 PetscBool hasop; 1427 1428 PetscFunctionBegin; 1429 if (flag == SOR_APPLY_UPPER) { 1430 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1431 PetscFunctionReturn(PETSC_SUCCESS); 1432 } 1433 1434 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1435 1436 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1437 if (flag & SOR_ZERO_INITIAL_GUESS) { 1438 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1439 its--; 1440 } 1441 1442 while (its--) { 1443 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1444 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1445 1446 /* update rhs: bb1 = bb - B*x */ 1447 PetscCall(VecScale(mat->lvec, -1.0)); 1448 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1449 1450 /* local sweep */ 1451 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1452 } 1453 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1454 if (flag & SOR_ZERO_INITIAL_GUESS) { 1455 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1456 its--; 1457 } 1458 while (its--) { 1459 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1460 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1461 1462 /* update rhs: bb1 = bb - B*x */ 1463 PetscCall(VecScale(mat->lvec, -1.0)); 1464 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1465 1466 /* local sweep */ 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1468 } 1469 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1470 if (flag & SOR_ZERO_INITIAL_GUESS) { 1471 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1472 its--; 1473 } 1474 while (its--) { 1475 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1476 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1477 1478 /* update rhs: bb1 = bb - B*x */ 1479 PetscCall(VecScale(mat->lvec, -1.0)); 1480 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1481 1482 /* local sweep */ 1483 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1484 } 1485 } else if (flag & SOR_EISENSTAT) { 1486 Vec xx1; 1487 1488 PetscCall(VecDuplicate(bb, &xx1)); 1489 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1490 1491 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1492 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1493 if (!mat->diag) { 1494 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1495 PetscCall(MatGetDiagonal(matin, mat->diag)); 1496 } 1497 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1498 if (hasop) { 1499 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1500 } else { 1501 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1502 } 1503 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1504 1505 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1506 1507 /* local sweep */ 1508 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1509 PetscCall(VecAXPY(xx, 1.0, xx1)); 1510 PetscCall(VecDestroy(&xx1)); 1511 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1512 1513 PetscCall(VecDestroy(&bb1)); 1514 1515 matin->factorerrortype = mat->A->factorerrortype; 1516 PetscFunctionReturn(PETSC_SUCCESS); 1517 } 1518 1519 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1520 { 1521 Mat aA, aB, Aperm; 1522 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1523 PetscScalar *aa, *ba; 1524 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1525 PetscSF rowsf, sf; 1526 IS parcolp = NULL; 1527 PetscBool done; 1528 1529 PetscFunctionBegin; 1530 PetscCall(MatGetLocalSize(A, &m, &n)); 1531 PetscCall(ISGetIndices(rowp, &rwant)); 1532 PetscCall(ISGetIndices(colp, &cwant)); 1533 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1534 1535 /* Invert row permutation to find out where my rows should go */ 1536 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1537 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1538 PetscCall(PetscSFSetFromOptions(rowsf)); 1539 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1540 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1541 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1542 1543 /* Invert column permutation to find out where my columns should go */ 1544 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1545 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1546 PetscCall(PetscSFSetFromOptions(sf)); 1547 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1548 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1549 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1550 PetscCall(PetscSFDestroy(&sf)); 1551 1552 PetscCall(ISRestoreIndices(rowp, &rwant)); 1553 PetscCall(ISRestoreIndices(colp, &cwant)); 1554 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1555 1556 /* Find out where my gcols should go */ 1557 PetscCall(MatGetSize(aB, NULL, &ng)); 1558 PetscCall(PetscMalloc1(ng, &gcdest)); 1559 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1560 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1561 PetscCall(PetscSFSetFromOptions(sf)); 1562 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1563 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1564 PetscCall(PetscSFDestroy(&sf)); 1565 1566 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1567 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1568 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1569 for (i = 0; i < m; i++) { 1570 PetscInt row = rdest[i]; 1571 PetscMPIInt rowner; 1572 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1573 for (j = ai[i]; j < ai[i + 1]; j++) { 1574 PetscInt col = cdest[aj[j]]; 1575 PetscMPIInt cowner; 1576 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1577 if (rowner == cowner) dnnz[i]++; 1578 else onnz[i]++; 1579 } 1580 for (j = bi[i]; j < bi[i + 1]; j++) { 1581 PetscInt col = gcdest[bj[j]]; 1582 PetscMPIInt cowner; 1583 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1584 if (rowner == cowner) dnnz[i]++; 1585 else onnz[i]++; 1586 } 1587 } 1588 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1589 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1590 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1591 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1592 PetscCall(PetscSFDestroy(&rowsf)); 1593 1594 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1595 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1596 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1597 for (i = 0; i < m; i++) { 1598 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1599 PetscInt j0, rowlen; 1600 rowlen = ai[i + 1] - ai[i]; 1601 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1602 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1603 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1604 } 1605 rowlen = bi[i + 1] - bi[i]; 1606 for (j0 = j = 0; j < rowlen; j0 = j) { 1607 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1608 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1609 } 1610 } 1611 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1612 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1613 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1614 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1615 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1616 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1617 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1618 PetscCall(PetscFree3(work, rdest, cdest)); 1619 PetscCall(PetscFree(gcdest)); 1620 if (parcolp) PetscCall(ISDestroy(&colp)); 1621 *B = Aperm; 1622 PetscFunctionReturn(PETSC_SUCCESS); 1623 } 1624 1625 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1626 { 1627 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1628 1629 PetscFunctionBegin; 1630 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1631 if (ghosts) *ghosts = aij->garray; 1632 PetscFunctionReturn(PETSC_SUCCESS); 1633 } 1634 1635 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1636 { 1637 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1638 Mat A = mat->A, B = mat->B; 1639 PetscLogDouble isend[5], irecv[5]; 1640 1641 PetscFunctionBegin; 1642 info->block_size = 1.0; 1643 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1644 1645 isend[0] = info->nz_used; 1646 isend[1] = info->nz_allocated; 1647 isend[2] = info->nz_unneeded; 1648 isend[3] = info->memory; 1649 isend[4] = info->mallocs; 1650 1651 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1652 1653 isend[0] += info->nz_used; 1654 isend[1] += info->nz_allocated; 1655 isend[2] += info->nz_unneeded; 1656 isend[3] += info->memory; 1657 isend[4] += info->mallocs; 1658 if (flag == MAT_LOCAL) { 1659 info->nz_used = isend[0]; 1660 info->nz_allocated = isend[1]; 1661 info->nz_unneeded = isend[2]; 1662 info->memory = isend[3]; 1663 info->mallocs = isend[4]; 1664 } else if (flag == MAT_GLOBAL_MAX) { 1665 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1666 1667 info->nz_used = irecv[0]; 1668 info->nz_allocated = irecv[1]; 1669 info->nz_unneeded = irecv[2]; 1670 info->memory = irecv[3]; 1671 info->mallocs = irecv[4]; 1672 } else if (flag == MAT_GLOBAL_SUM) { 1673 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1674 1675 info->nz_used = irecv[0]; 1676 info->nz_allocated = irecv[1]; 1677 info->nz_unneeded = irecv[2]; 1678 info->memory = irecv[3]; 1679 info->mallocs = irecv[4]; 1680 } 1681 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1682 info->fill_ratio_needed = 0; 1683 info->factor_mallocs = 0; 1684 PetscFunctionReturn(PETSC_SUCCESS); 1685 } 1686 1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1688 { 1689 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1690 1691 PetscFunctionBegin; 1692 switch (op) { 1693 case MAT_NEW_NONZERO_LOCATIONS: 1694 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1695 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1696 case MAT_KEEP_NONZERO_PATTERN: 1697 case MAT_NEW_NONZERO_LOCATION_ERR: 1698 case MAT_USE_INODES: 1699 case MAT_IGNORE_ZERO_ENTRIES: 1700 case MAT_FORM_EXPLICIT_TRANSPOSE: 1701 MatCheckPreallocated(A, 1); 1702 PetscCall(MatSetOption(a->A, op, flg)); 1703 PetscCall(MatSetOption(a->B, op, flg)); 1704 break; 1705 case MAT_ROW_ORIENTED: 1706 MatCheckPreallocated(A, 1); 1707 a->roworiented = flg; 1708 1709 PetscCall(MatSetOption(a->A, op, flg)); 1710 PetscCall(MatSetOption(a->B, op, flg)); 1711 break; 1712 case MAT_FORCE_DIAGONAL_ENTRIES: 1713 case MAT_SORTED_FULL: 1714 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1715 break; 1716 case MAT_IGNORE_OFF_PROC_ENTRIES: 1717 a->donotstash = flg; 1718 break; 1719 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1720 case MAT_SPD: 1721 case MAT_SYMMETRIC: 1722 case MAT_STRUCTURALLY_SYMMETRIC: 1723 case MAT_HERMITIAN: 1724 case MAT_SYMMETRY_ETERNAL: 1725 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1726 case MAT_SPD_ETERNAL: 1727 /* if the diagonal matrix is square it inherits some of the properties above */ 1728 break; 1729 case MAT_SUBMAT_SINGLEIS: 1730 A->submat_singleis = flg; 1731 break; 1732 case MAT_STRUCTURE_ONLY: 1733 /* The option is handled directly by MatSetOption() */ 1734 break; 1735 default: 1736 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1737 } 1738 PetscFunctionReturn(PETSC_SUCCESS); 1739 } 1740 1741 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1744 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1745 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1746 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1747 PetscInt *cmap, *idx_p; 1748 1749 PetscFunctionBegin; 1750 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1751 mat->getrowactive = PETSC_TRUE; 1752 1753 if (!mat->rowvalues && (idx || v)) { 1754 /* 1755 allocate enough space to hold information from the longest row. 1756 */ 1757 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1758 PetscInt max = 1, tmp; 1759 for (i = 0; i < matin->rmap->n; i++) { 1760 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1761 if (max < tmp) max = tmp; 1762 } 1763 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1764 } 1765 1766 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1767 lrow = row - rstart; 1768 1769 pvA = &vworkA; 1770 pcA = &cworkA; 1771 pvB = &vworkB; 1772 pcB = &cworkB; 1773 if (!v) { 1774 pvA = NULL; 1775 pvB = NULL; 1776 } 1777 if (!idx) { 1778 pcA = NULL; 1779 if (!v) pcB = NULL; 1780 } 1781 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1782 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1783 nztot = nzA + nzB; 1784 1785 cmap = mat->garray; 1786 if (v || idx) { 1787 if (nztot) { 1788 /* Sort by increasing column numbers, assuming A and B already sorted */ 1789 PetscInt imark = -1; 1790 if (v) { 1791 *v = v_p = mat->rowvalues; 1792 for (i = 0; i < nzB; i++) { 1793 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1794 else break; 1795 } 1796 imark = i; 1797 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1798 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1799 } 1800 if (idx) { 1801 *idx = idx_p = mat->rowindices; 1802 if (imark > -1) { 1803 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1804 } else { 1805 for (i = 0; i < nzB; i++) { 1806 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1807 else break; 1808 } 1809 imark = i; 1810 } 1811 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1812 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1813 } 1814 } else { 1815 if (idx) *idx = NULL; 1816 if (v) *v = NULL; 1817 } 1818 } 1819 *nz = nztot; 1820 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1821 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1822 PetscFunctionReturn(PETSC_SUCCESS); 1823 } 1824 1825 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1826 { 1827 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1828 1829 PetscFunctionBegin; 1830 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1831 aij->getrowactive = PETSC_FALSE; 1832 PetscFunctionReturn(PETSC_SUCCESS); 1833 } 1834 1835 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1836 { 1837 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1838 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1839 PetscInt i, j, cstart = mat->cmap->rstart; 1840 PetscReal sum = 0.0; 1841 const MatScalar *v, *amata, *bmata; 1842 1843 PetscFunctionBegin; 1844 if (aij->size == 1) { 1845 PetscCall(MatNorm(aij->A, type, norm)); 1846 } else { 1847 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1848 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1849 if (type == NORM_FROBENIUS) { 1850 v = amata; 1851 for (i = 0; i < amat->nz; i++) { 1852 sum += PetscRealPart(PetscConj(*v) * (*v)); 1853 v++; 1854 } 1855 v = bmata; 1856 for (i = 0; i < bmat->nz; i++) { 1857 sum += PetscRealPart(PetscConj(*v) * (*v)); 1858 v++; 1859 } 1860 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1861 *norm = PetscSqrtReal(*norm); 1862 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1863 } else if (type == NORM_1) { /* max column norm */ 1864 PetscReal *tmp, *tmp2; 1865 PetscInt *jj, *garray = aij->garray; 1866 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1867 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1868 *norm = 0.0; 1869 v = amata; 1870 jj = amat->j; 1871 for (j = 0; j < amat->nz; j++) { 1872 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1873 v++; 1874 } 1875 v = bmata; 1876 jj = bmat->j; 1877 for (j = 0; j < bmat->nz; j++) { 1878 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1879 v++; 1880 } 1881 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1882 for (j = 0; j < mat->cmap->N; j++) { 1883 if (tmp2[j] > *norm) *norm = tmp2[j]; 1884 } 1885 PetscCall(PetscFree(tmp)); 1886 PetscCall(PetscFree(tmp2)); 1887 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1888 } else if (type == NORM_INFINITY) { /* max row norm */ 1889 PetscReal ntemp = 0.0; 1890 for (j = 0; j < aij->A->rmap->n; j++) { 1891 v = amata + amat->i[j]; 1892 sum = 0.0; 1893 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1894 sum += PetscAbsScalar(*v); 1895 v++; 1896 } 1897 v = bmata + bmat->i[j]; 1898 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1899 sum += PetscAbsScalar(*v); 1900 v++; 1901 } 1902 if (sum > ntemp) ntemp = sum; 1903 } 1904 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1905 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1906 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1907 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1908 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1909 } 1910 PetscFunctionReturn(PETSC_SUCCESS); 1911 } 1912 1913 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1914 { 1915 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1916 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1917 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1918 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1919 Mat B, A_diag, *B_diag; 1920 const MatScalar *pbv, *bv; 1921 1922 PetscFunctionBegin; 1923 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1924 ma = A->rmap->n; 1925 na = A->cmap->n; 1926 mb = a->B->rmap->n; 1927 nb = a->B->cmap->n; 1928 ai = Aloc->i; 1929 aj = Aloc->j; 1930 bi = Bloc->i; 1931 bj = Bloc->j; 1932 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1933 PetscInt *d_nnz, *g_nnz, *o_nnz; 1934 PetscSFNode *oloc; 1935 PETSC_UNUSED PetscSF sf; 1936 1937 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1938 /* compute d_nnz for preallocation */ 1939 PetscCall(PetscArrayzero(d_nnz, na)); 1940 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1941 /* compute local off-diagonal contributions */ 1942 PetscCall(PetscArrayzero(g_nnz, nb)); 1943 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1944 /* map those to global */ 1945 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1946 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1947 PetscCall(PetscSFSetFromOptions(sf)); 1948 PetscCall(PetscArrayzero(o_nnz, na)); 1949 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1950 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1951 PetscCall(PetscSFDestroy(&sf)); 1952 1953 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1954 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1955 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1956 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1957 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1958 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1959 } else { 1960 B = *matout; 1961 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1962 } 1963 1964 b = (Mat_MPIAIJ *)B->data; 1965 A_diag = a->A; 1966 B_diag = &b->A; 1967 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1968 A_diag_ncol = A_diag->cmap->N; 1969 B_diag_ilen = sub_B_diag->ilen; 1970 B_diag_i = sub_B_diag->i; 1971 1972 /* Set ilen for diagonal of B */ 1973 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1974 1975 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1976 very quickly (=without using MatSetValues), because all writes are local. */ 1977 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1978 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1979 1980 /* copy over the B part */ 1981 PetscCall(PetscMalloc1(bi[mb], &cols)); 1982 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1983 pbv = bv; 1984 row = A->rmap->rstart; 1985 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1986 cols_tmp = cols; 1987 for (i = 0; i < mb; i++) { 1988 ncol = bi[i + 1] - bi[i]; 1989 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1990 row++; 1991 pbv += ncol; 1992 cols_tmp += ncol; 1993 } 1994 PetscCall(PetscFree(cols)); 1995 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1996 1997 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1998 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1999 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2000 *matout = B; 2001 } else { 2002 PetscCall(MatHeaderMerge(A, &B)); 2003 } 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 2008 { 2009 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2010 Mat a = aij->A, b = aij->B; 2011 PetscInt s1, s2, s3; 2012 2013 PetscFunctionBegin; 2014 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2015 if (rr) { 2016 PetscCall(VecGetLocalSize(rr, &s1)); 2017 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2018 /* Overlap communication with computation. */ 2019 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2020 } 2021 if (ll) { 2022 PetscCall(VecGetLocalSize(ll, &s1)); 2023 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2024 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2025 } 2026 /* scale the diagonal block */ 2027 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2028 2029 if (rr) { 2030 /* Do a scatter end and then right scale the off-diagonal block */ 2031 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2032 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2033 } 2034 PetscFunctionReturn(PETSC_SUCCESS); 2035 } 2036 2037 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2038 { 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2040 2041 PetscFunctionBegin; 2042 PetscCall(MatSetUnfactored(a->A)); 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2047 { 2048 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2049 Mat a, b, c, d; 2050 PetscBool flg; 2051 2052 PetscFunctionBegin; 2053 a = matA->A; 2054 b = matA->B; 2055 c = matB->A; 2056 d = matB->B; 2057 2058 PetscCall(MatEqual(a, c, &flg)); 2059 if (flg) PetscCall(MatEqual(b, d, &flg)); 2060 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2061 PetscFunctionReturn(PETSC_SUCCESS); 2062 } 2063 2064 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2065 { 2066 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2067 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2068 2069 PetscFunctionBegin; 2070 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2071 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2072 /* because of the column compression in the off-processor part of the matrix a->B, 2073 the number of columns in a->B and b->B may be different, hence we cannot call 2074 the MatCopy() directly on the two parts. If need be, we can provide a more 2075 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2076 then copying the submatrices */ 2077 PetscCall(MatCopy_Basic(A, B, str)); 2078 } else { 2079 PetscCall(MatCopy(a->A, b->A, str)); 2080 PetscCall(MatCopy(a->B, b->B, str)); 2081 } 2082 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2083 PetscFunctionReturn(PETSC_SUCCESS); 2084 } 2085 2086 /* 2087 Computes the number of nonzeros per row needed for preallocation when X and Y 2088 have different nonzero structure. 2089 */ 2090 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2091 { 2092 PetscInt i, j, k, nzx, nzy; 2093 2094 PetscFunctionBegin; 2095 /* Set the number of nonzeros in the new matrix */ 2096 for (i = 0; i < m; i++) { 2097 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2098 nzx = xi[i + 1] - xi[i]; 2099 nzy = yi[i + 1] - yi[i]; 2100 nnz[i] = 0; 2101 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2102 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2103 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2104 nnz[i]++; 2105 } 2106 for (; k < nzy; k++) nnz[i]++; 2107 } 2108 PetscFunctionReturn(PETSC_SUCCESS); 2109 } 2110 2111 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2112 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2113 { 2114 PetscInt m = Y->rmap->N; 2115 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2116 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2117 2118 PetscFunctionBegin; 2119 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2120 PetscFunctionReturn(PETSC_SUCCESS); 2121 } 2122 2123 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2124 { 2125 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2126 2127 PetscFunctionBegin; 2128 if (str == SAME_NONZERO_PATTERN) { 2129 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2130 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2131 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2132 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2133 } else { 2134 Mat B; 2135 PetscInt *nnz_d, *nnz_o; 2136 2137 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2138 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2139 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2140 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2141 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2142 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2143 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2144 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2145 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2146 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2147 PetscCall(MatHeaderMerge(Y, &B)); 2148 PetscCall(PetscFree(nnz_d)); 2149 PetscCall(PetscFree(nnz_o)); 2150 } 2151 PetscFunctionReturn(PETSC_SUCCESS); 2152 } 2153 2154 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2155 2156 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2157 { 2158 PetscFunctionBegin; 2159 if (PetscDefined(USE_COMPLEX)) { 2160 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2161 2162 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2163 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2164 } 2165 PetscFunctionReturn(PETSC_SUCCESS); 2166 } 2167 2168 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2169 { 2170 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2171 2172 PetscFunctionBegin; 2173 PetscCall(MatRealPart(a->A)); 2174 PetscCall(MatRealPart(a->B)); 2175 PetscFunctionReturn(PETSC_SUCCESS); 2176 } 2177 2178 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2179 { 2180 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2181 2182 PetscFunctionBegin; 2183 PetscCall(MatImaginaryPart(a->A)); 2184 PetscCall(MatImaginaryPart(a->B)); 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 2188 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2189 { 2190 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2191 PetscInt i, *idxb = NULL, m = A->rmap->n; 2192 PetscScalar *va, *vv; 2193 Vec vB, vA; 2194 const PetscScalar *vb; 2195 2196 PetscFunctionBegin; 2197 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2198 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2199 2200 PetscCall(VecGetArrayWrite(vA, &va)); 2201 if (idx) { 2202 for (i = 0; i < m; i++) { 2203 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2204 } 2205 } 2206 2207 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2208 PetscCall(PetscMalloc1(m, &idxb)); 2209 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2210 2211 PetscCall(VecGetArrayWrite(v, &vv)); 2212 PetscCall(VecGetArrayRead(vB, &vb)); 2213 for (i = 0; i < m; i++) { 2214 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2215 vv[i] = vb[i]; 2216 if (idx) idx[i] = a->garray[idxb[i]]; 2217 } else { 2218 vv[i] = va[i]; 2219 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2220 } 2221 } 2222 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2223 PetscCall(VecRestoreArrayWrite(vA, &va)); 2224 PetscCall(VecRestoreArrayRead(vB, &vb)); 2225 PetscCall(PetscFree(idxb)); 2226 PetscCall(VecDestroy(&vA)); 2227 PetscCall(VecDestroy(&vB)); 2228 PetscFunctionReturn(PETSC_SUCCESS); 2229 } 2230 2231 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2232 { 2233 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2234 PetscInt m = A->rmap->n, n = A->cmap->n; 2235 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2236 PetscInt *cmap = mat->garray; 2237 PetscInt *diagIdx, *offdiagIdx; 2238 Vec diagV, offdiagV; 2239 PetscScalar *a, *diagA, *offdiagA; 2240 const PetscScalar *ba, *bav; 2241 PetscInt r, j, col, ncols, *bi, *bj; 2242 Mat B = mat->B; 2243 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2244 2245 PetscFunctionBegin; 2246 /* When a process holds entire A and other processes have no entry */ 2247 if (A->cmap->N == n) { 2248 PetscCall(VecGetArrayWrite(v, &diagA)); 2249 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2250 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2251 PetscCall(VecDestroy(&diagV)); 2252 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2253 PetscFunctionReturn(PETSC_SUCCESS); 2254 } else if (n == 0) { 2255 if (m) { 2256 PetscCall(VecGetArrayWrite(v, &a)); 2257 for (r = 0; r < m; r++) { 2258 a[r] = 0.0; 2259 if (idx) idx[r] = -1; 2260 } 2261 PetscCall(VecRestoreArrayWrite(v, &a)); 2262 } 2263 PetscFunctionReturn(PETSC_SUCCESS); 2264 } 2265 2266 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2267 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2268 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2269 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2270 2271 /* Get offdiagIdx[] for implicit 0.0 */ 2272 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2273 ba = bav; 2274 bi = b->i; 2275 bj = b->j; 2276 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2277 for (r = 0; r < m; r++) { 2278 ncols = bi[r + 1] - bi[r]; 2279 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2280 offdiagA[r] = *ba; 2281 offdiagIdx[r] = cmap[0]; 2282 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2283 offdiagA[r] = 0.0; 2284 2285 /* Find first hole in the cmap */ 2286 for (j = 0; j < ncols; j++) { 2287 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2288 if (col > j && j < cstart) { 2289 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2290 break; 2291 } else if (col > j + n && j >= cstart) { 2292 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2293 break; 2294 } 2295 } 2296 if (j == ncols && ncols < A->cmap->N - n) { 2297 /* a hole is outside compressed Bcols */ 2298 if (ncols == 0) { 2299 if (cstart) { 2300 offdiagIdx[r] = 0; 2301 } else offdiagIdx[r] = cend; 2302 } else { /* ncols > 0 */ 2303 offdiagIdx[r] = cmap[ncols - 1] + 1; 2304 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2305 } 2306 } 2307 } 2308 2309 for (j = 0; j < ncols; j++) { 2310 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2311 offdiagA[r] = *ba; 2312 offdiagIdx[r] = cmap[*bj]; 2313 } 2314 ba++; 2315 bj++; 2316 } 2317 } 2318 2319 PetscCall(VecGetArrayWrite(v, &a)); 2320 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2321 for (r = 0; r < m; ++r) { 2322 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2323 a[r] = diagA[r]; 2324 if (idx) idx[r] = cstart + diagIdx[r]; 2325 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2326 a[r] = diagA[r]; 2327 if (idx) { 2328 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2329 idx[r] = cstart + diagIdx[r]; 2330 } else idx[r] = offdiagIdx[r]; 2331 } 2332 } else { 2333 a[r] = offdiagA[r]; 2334 if (idx) idx[r] = offdiagIdx[r]; 2335 } 2336 } 2337 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2338 PetscCall(VecRestoreArrayWrite(v, &a)); 2339 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2340 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2341 PetscCall(VecDestroy(&diagV)); 2342 PetscCall(VecDestroy(&offdiagV)); 2343 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2344 PetscFunctionReturn(PETSC_SUCCESS); 2345 } 2346 2347 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2348 { 2349 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2350 PetscInt m = A->rmap->n, n = A->cmap->n; 2351 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2352 PetscInt *cmap = mat->garray; 2353 PetscInt *diagIdx, *offdiagIdx; 2354 Vec diagV, offdiagV; 2355 PetscScalar *a, *diagA, *offdiagA; 2356 const PetscScalar *ba, *bav; 2357 PetscInt r, j, col, ncols, *bi, *bj; 2358 Mat B = mat->B; 2359 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2360 2361 PetscFunctionBegin; 2362 /* When a process holds entire A and other processes have no entry */ 2363 if (A->cmap->N == n) { 2364 PetscCall(VecGetArrayWrite(v, &diagA)); 2365 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2366 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2367 PetscCall(VecDestroy(&diagV)); 2368 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2369 PetscFunctionReturn(PETSC_SUCCESS); 2370 } else if (n == 0) { 2371 if (m) { 2372 PetscCall(VecGetArrayWrite(v, &a)); 2373 for (r = 0; r < m; r++) { 2374 a[r] = PETSC_MAX_REAL; 2375 if (idx) idx[r] = -1; 2376 } 2377 PetscCall(VecRestoreArrayWrite(v, &a)); 2378 } 2379 PetscFunctionReturn(PETSC_SUCCESS); 2380 } 2381 2382 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2383 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2384 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2385 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2386 2387 /* Get offdiagIdx[] for implicit 0.0 */ 2388 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2389 ba = bav; 2390 bi = b->i; 2391 bj = b->j; 2392 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2393 for (r = 0; r < m; r++) { 2394 ncols = bi[r + 1] - bi[r]; 2395 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2396 offdiagA[r] = *ba; 2397 offdiagIdx[r] = cmap[0]; 2398 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2399 offdiagA[r] = 0.0; 2400 2401 /* Find first hole in the cmap */ 2402 for (j = 0; j < ncols; j++) { 2403 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2404 if (col > j && j < cstart) { 2405 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2406 break; 2407 } else if (col > j + n && j >= cstart) { 2408 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2409 break; 2410 } 2411 } 2412 if (j == ncols && ncols < A->cmap->N - n) { 2413 /* a hole is outside compressed Bcols */ 2414 if (ncols == 0) { 2415 if (cstart) { 2416 offdiagIdx[r] = 0; 2417 } else offdiagIdx[r] = cend; 2418 } else { /* ncols > 0 */ 2419 offdiagIdx[r] = cmap[ncols - 1] + 1; 2420 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2421 } 2422 } 2423 } 2424 2425 for (j = 0; j < ncols; j++) { 2426 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2427 offdiagA[r] = *ba; 2428 offdiagIdx[r] = cmap[*bj]; 2429 } 2430 ba++; 2431 bj++; 2432 } 2433 } 2434 2435 PetscCall(VecGetArrayWrite(v, &a)); 2436 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2437 for (r = 0; r < m; ++r) { 2438 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2439 a[r] = diagA[r]; 2440 if (idx) idx[r] = cstart + diagIdx[r]; 2441 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2442 a[r] = diagA[r]; 2443 if (idx) { 2444 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2445 idx[r] = cstart + diagIdx[r]; 2446 } else idx[r] = offdiagIdx[r]; 2447 } 2448 } else { 2449 a[r] = offdiagA[r]; 2450 if (idx) idx[r] = offdiagIdx[r]; 2451 } 2452 } 2453 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2454 PetscCall(VecRestoreArrayWrite(v, &a)); 2455 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2456 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2457 PetscCall(VecDestroy(&diagV)); 2458 PetscCall(VecDestroy(&offdiagV)); 2459 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2460 PetscFunctionReturn(PETSC_SUCCESS); 2461 } 2462 2463 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2464 { 2465 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2466 PetscInt m = A->rmap->n, n = A->cmap->n; 2467 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2468 PetscInt *cmap = mat->garray; 2469 PetscInt *diagIdx, *offdiagIdx; 2470 Vec diagV, offdiagV; 2471 PetscScalar *a, *diagA, *offdiagA; 2472 const PetscScalar *ba, *bav; 2473 PetscInt r, j, col, ncols, *bi, *bj; 2474 Mat B = mat->B; 2475 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2476 2477 PetscFunctionBegin; 2478 /* When a process holds entire A and other processes have no entry */ 2479 if (A->cmap->N == n) { 2480 PetscCall(VecGetArrayWrite(v, &diagA)); 2481 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2482 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2483 PetscCall(VecDestroy(&diagV)); 2484 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2485 PetscFunctionReturn(PETSC_SUCCESS); 2486 } else if (n == 0) { 2487 if (m) { 2488 PetscCall(VecGetArrayWrite(v, &a)); 2489 for (r = 0; r < m; r++) { 2490 a[r] = PETSC_MIN_REAL; 2491 if (idx) idx[r] = -1; 2492 } 2493 PetscCall(VecRestoreArrayWrite(v, &a)); 2494 } 2495 PetscFunctionReturn(PETSC_SUCCESS); 2496 } 2497 2498 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2499 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2500 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2501 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2502 2503 /* Get offdiagIdx[] for implicit 0.0 */ 2504 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2505 ba = bav; 2506 bi = b->i; 2507 bj = b->j; 2508 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2509 for (r = 0; r < m; r++) { 2510 ncols = bi[r + 1] - bi[r]; 2511 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2512 offdiagA[r] = *ba; 2513 offdiagIdx[r] = cmap[0]; 2514 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2515 offdiagA[r] = 0.0; 2516 2517 /* Find first hole in the cmap */ 2518 for (j = 0; j < ncols; j++) { 2519 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2520 if (col > j && j < cstart) { 2521 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2522 break; 2523 } else if (col > j + n && j >= cstart) { 2524 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2525 break; 2526 } 2527 } 2528 if (j == ncols && ncols < A->cmap->N - n) { 2529 /* a hole is outside compressed Bcols */ 2530 if (ncols == 0) { 2531 if (cstart) { 2532 offdiagIdx[r] = 0; 2533 } else offdiagIdx[r] = cend; 2534 } else { /* ncols > 0 */ 2535 offdiagIdx[r] = cmap[ncols - 1] + 1; 2536 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2537 } 2538 } 2539 } 2540 2541 for (j = 0; j < ncols; j++) { 2542 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2543 offdiagA[r] = *ba; 2544 offdiagIdx[r] = cmap[*bj]; 2545 } 2546 ba++; 2547 bj++; 2548 } 2549 } 2550 2551 PetscCall(VecGetArrayWrite(v, &a)); 2552 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2553 for (r = 0; r < m; ++r) { 2554 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2555 a[r] = diagA[r]; 2556 if (idx) idx[r] = cstart + diagIdx[r]; 2557 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2558 a[r] = diagA[r]; 2559 if (idx) { 2560 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2561 idx[r] = cstart + diagIdx[r]; 2562 } else idx[r] = offdiagIdx[r]; 2563 } 2564 } else { 2565 a[r] = offdiagA[r]; 2566 if (idx) idx[r] = offdiagIdx[r]; 2567 } 2568 } 2569 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2570 PetscCall(VecRestoreArrayWrite(v, &a)); 2571 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2572 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2573 PetscCall(VecDestroy(&diagV)); 2574 PetscCall(VecDestroy(&offdiagV)); 2575 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2576 PetscFunctionReturn(PETSC_SUCCESS); 2577 } 2578 2579 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2580 { 2581 Mat *dummy; 2582 2583 PetscFunctionBegin; 2584 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2585 *newmat = *dummy; 2586 PetscCall(PetscFree(dummy)); 2587 PetscFunctionReturn(PETSC_SUCCESS); 2588 } 2589 2590 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2591 { 2592 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2593 2594 PetscFunctionBegin; 2595 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2596 A->factorerrortype = a->A->factorerrortype; 2597 PetscFunctionReturn(PETSC_SUCCESS); 2598 } 2599 2600 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2601 { 2602 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2603 2604 PetscFunctionBegin; 2605 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2606 PetscCall(MatSetRandom(aij->A, rctx)); 2607 if (x->assembled) { 2608 PetscCall(MatSetRandom(aij->B, rctx)); 2609 } else { 2610 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2611 } 2612 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2613 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2614 PetscFunctionReturn(PETSC_SUCCESS); 2615 } 2616 2617 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2618 { 2619 PetscFunctionBegin; 2620 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2621 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2622 PetscFunctionReturn(PETSC_SUCCESS); 2623 } 2624 2625 /*@ 2626 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2627 2628 Not Collective 2629 2630 Input Parameter: 2631 . A - the matrix 2632 2633 Output Parameter: 2634 . nz - the number of nonzeros 2635 2636 Level: advanced 2637 2638 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `Mat` 2639 @*/ 2640 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2641 { 2642 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2643 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2644 2645 PetscFunctionBegin; 2646 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2647 PetscFunctionReturn(PETSC_SUCCESS); 2648 } 2649 2650 /*@ 2651 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2652 2653 Collective 2654 2655 Input Parameters: 2656 + A - the matrix 2657 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2658 2659 Level: advanced 2660 2661 .seealso: [](chapter_matrices), `Mat`, `Mat`, `MATMPIAIJ` 2662 @*/ 2663 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2664 { 2665 PetscFunctionBegin; 2666 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2667 PetscFunctionReturn(PETSC_SUCCESS); 2668 } 2669 2670 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2671 { 2672 PetscBool sc = PETSC_FALSE, flg; 2673 2674 PetscFunctionBegin; 2675 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2676 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2677 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2678 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2679 PetscOptionsHeadEnd(); 2680 PetscFunctionReturn(PETSC_SUCCESS); 2681 } 2682 2683 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2684 { 2685 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2686 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2687 2688 PetscFunctionBegin; 2689 if (!Y->preallocated) { 2690 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2691 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2692 PetscInt nonew = aij->nonew; 2693 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2694 aij->nonew = nonew; 2695 } 2696 PetscCall(MatShift_Basic(Y, a)); 2697 PetscFunctionReturn(PETSC_SUCCESS); 2698 } 2699 2700 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2701 { 2702 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2703 2704 PetscFunctionBegin; 2705 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2706 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2707 if (d) { 2708 PetscInt rstart; 2709 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2710 *d += rstart; 2711 } 2712 PetscFunctionReturn(PETSC_SUCCESS); 2713 } 2714 2715 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2716 { 2717 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2718 2719 PetscFunctionBegin; 2720 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2725 { 2726 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2727 2728 PetscFunctionBegin; 2729 PetscCall(MatEliminateZeros(a->A)); 2730 PetscCall(MatEliminateZeros(a->B)); 2731 PetscFunctionReturn(PETSC_SUCCESS); 2732 } 2733 2734 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2735 MatGetRow_MPIAIJ, 2736 MatRestoreRow_MPIAIJ, 2737 MatMult_MPIAIJ, 2738 /* 4*/ MatMultAdd_MPIAIJ, 2739 MatMultTranspose_MPIAIJ, 2740 MatMultTransposeAdd_MPIAIJ, 2741 NULL, 2742 NULL, 2743 NULL, 2744 /*10*/ NULL, 2745 NULL, 2746 NULL, 2747 MatSOR_MPIAIJ, 2748 MatTranspose_MPIAIJ, 2749 /*15*/ MatGetInfo_MPIAIJ, 2750 MatEqual_MPIAIJ, 2751 MatGetDiagonal_MPIAIJ, 2752 MatDiagonalScale_MPIAIJ, 2753 MatNorm_MPIAIJ, 2754 /*20*/ MatAssemblyBegin_MPIAIJ, 2755 MatAssemblyEnd_MPIAIJ, 2756 MatSetOption_MPIAIJ, 2757 MatZeroEntries_MPIAIJ, 2758 /*24*/ MatZeroRows_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*29*/ MatSetUp_MPI_Hash, 2764 NULL, 2765 NULL, 2766 MatGetDiagonalBlock_MPIAIJ, 2767 NULL, 2768 /*34*/ MatDuplicate_MPIAIJ, 2769 NULL, 2770 NULL, 2771 NULL, 2772 NULL, 2773 /*39*/ MatAXPY_MPIAIJ, 2774 MatCreateSubMatrices_MPIAIJ, 2775 MatIncreaseOverlap_MPIAIJ, 2776 MatGetValues_MPIAIJ, 2777 MatCopy_MPIAIJ, 2778 /*44*/ MatGetRowMax_MPIAIJ, 2779 MatScale_MPIAIJ, 2780 MatShift_MPIAIJ, 2781 MatDiagonalSet_MPIAIJ, 2782 MatZeroRowsColumns_MPIAIJ, 2783 /*49*/ MatSetRandom_MPIAIJ, 2784 MatGetRowIJ_MPIAIJ, 2785 MatRestoreRowIJ_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*54*/ MatFDColoringCreate_MPIXAIJ, 2789 NULL, 2790 MatSetUnfactored_MPIAIJ, 2791 MatPermute_MPIAIJ, 2792 NULL, 2793 /*59*/ MatCreateSubMatrix_MPIAIJ, 2794 MatDestroy_MPIAIJ, 2795 MatView_MPIAIJ, 2796 NULL, 2797 NULL, 2798 /*64*/ NULL, 2799 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2800 NULL, 2801 NULL, 2802 NULL, 2803 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2804 MatGetRowMinAbs_MPIAIJ, 2805 NULL, 2806 NULL, 2807 NULL, 2808 NULL, 2809 /*75*/ MatFDColoringApply_AIJ, 2810 MatSetFromOptions_MPIAIJ, 2811 NULL, 2812 NULL, 2813 MatFindZeroDiagonals_MPIAIJ, 2814 /*80*/ NULL, 2815 NULL, 2816 NULL, 2817 /*83*/ MatLoad_MPIAIJ, 2818 MatIsSymmetric_MPIAIJ, 2819 NULL, 2820 NULL, 2821 NULL, 2822 NULL, 2823 /*89*/ NULL, 2824 NULL, 2825 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2826 NULL, 2827 NULL, 2828 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2829 NULL, 2830 NULL, 2831 NULL, 2832 MatBindToCPU_MPIAIJ, 2833 /*99*/ MatProductSetFromOptions_MPIAIJ, 2834 NULL, 2835 NULL, 2836 MatConjugate_MPIAIJ, 2837 NULL, 2838 /*104*/ MatSetValuesRow_MPIAIJ, 2839 MatRealPart_MPIAIJ, 2840 MatImaginaryPart_MPIAIJ, 2841 NULL, 2842 NULL, 2843 /*109*/ NULL, 2844 NULL, 2845 MatGetRowMin_MPIAIJ, 2846 NULL, 2847 MatMissingDiagonal_MPIAIJ, 2848 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2849 NULL, 2850 MatGetGhosts_MPIAIJ, 2851 NULL, 2852 NULL, 2853 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2854 NULL, 2855 NULL, 2856 NULL, 2857 MatGetMultiProcBlock_MPIAIJ, 2858 /*124*/ MatFindNonzeroRows_MPIAIJ, 2859 MatGetColumnReductions_MPIAIJ, 2860 MatInvertBlockDiagonal_MPIAIJ, 2861 MatInvertVariableBlockDiagonal_MPIAIJ, 2862 MatCreateSubMatricesMPI_MPIAIJ, 2863 /*129*/ NULL, 2864 NULL, 2865 NULL, 2866 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2867 NULL, 2868 /*134*/ NULL, 2869 NULL, 2870 NULL, 2871 NULL, 2872 NULL, 2873 /*139*/ MatSetBlockSizes_MPIAIJ, 2874 NULL, 2875 NULL, 2876 MatFDColoringSetUp_MPIXAIJ, 2877 MatFindOffBlockDiagonalEntries_MPIAIJ, 2878 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2879 /*145*/ NULL, 2880 NULL, 2881 NULL, 2882 MatCreateGraph_Simple_AIJ, 2883 NULL, 2884 /*150*/ NULL, 2885 MatEliminateZeros_MPIAIJ}; 2886 2887 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2888 { 2889 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2890 2891 PetscFunctionBegin; 2892 PetscCall(MatStoreValues(aij->A)); 2893 PetscCall(MatStoreValues(aij->B)); 2894 PetscFunctionReturn(PETSC_SUCCESS); 2895 } 2896 2897 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2898 { 2899 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2900 2901 PetscFunctionBegin; 2902 PetscCall(MatRetrieveValues(aij->A)); 2903 PetscCall(MatRetrieveValues(aij->B)); 2904 PetscFunctionReturn(PETSC_SUCCESS); 2905 } 2906 2907 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2908 { 2909 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2910 PetscMPIInt size; 2911 2912 PetscFunctionBegin; 2913 if (B->hash_active) { 2914 PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops)))); 2915 B->hash_active = PETSC_FALSE; 2916 } 2917 PetscCall(PetscLayoutSetUp(B->rmap)); 2918 PetscCall(PetscLayoutSetUp(B->cmap)); 2919 2920 #if defined(PETSC_USE_CTABLE) 2921 PetscCall(PetscHMapIDestroy(&b->colmap)); 2922 #else 2923 PetscCall(PetscFree(b->colmap)); 2924 #endif 2925 PetscCall(PetscFree(b->garray)); 2926 PetscCall(VecDestroy(&b->lvec)); 2927 PetscCall(VecScatterDestroy(&b->Mvctx)); 2928 2929 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2930 PetscCall(MatDestroy(&b->B)); 2931 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2932 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2933 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2934 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2935 2936 PetscCall(MatDestroy(&b->A)); 2937 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2938 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2939 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2940 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2941 2942 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2943 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2944 B->preallocated = PETSC_TRUE; 2945 B->was_assembled = PETSC_FALSE; 2946 B->assembled = PETSC_FALSE; 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2951 { 2952 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2953 2954 PetscFunctionBegin; 2955 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2956 PetscCall(PetscLayoutSetUp(B->rmap)); 2957 PetscCall(PetscLayoutSetUp(B->cmap)); 2958 2959 #if defined(PETSC_USE_CTABLE) 2960 PetscCall(PetscHMapIDestroy(&b->colmap)); 2961 #else 2962 PetscCall(PetscFree(b->colmap)); 2963 #endif 2964 PetscCall(PetscFree(b->garray)); 2965 PetscCall(VecDestroy(&b->lvec)); 2966 PetscCall(VecScatterDestroy(&b->Mvctx)); 2967 2968 PetscCall(MatResetPreallocation(b->A)); 2969 PetscCall(MatResetPreallocation(b->B)); 2970 B->preallocated = PETSC_TRUE; 2971 B->was_assembled = PETSC_FALSE; 2972 B->assembled = PETSC_FALSE; 2973 PetscFunctionReturn(PETSC_SUCCESS); 2974 } 2975 2976 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2977 { 2978 Mat mat; 2979 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2980 2981 PetscFunctionBegin; 2982 *newmat = NULL; 2983 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2984 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2985 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2986 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2987 a = (Mat_MPIAIJ *)mat->data; 2988 2989 mat->factortype = matin->factortype; 2990 mat->assembled = matin->assembled; 2991 mat->insertmode = NOT_SET_VALUES; 2992 mat->preallocated = matin->preallocated; 2993 2994 a->size = oldmat->size; 2995 a->rank = oldmat->rank; 2996 a->donotstash = oldmat->donotstash; 2997 a->roworiented = oldmat->roworiented; 2998 a->rowindices = NULL; 2999 a->rowvalues = NULL; 3000 a->getrowactive = PETSC_FALSE; 3001 3002 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3003 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3004 3005 if (oldmat->colmap) { 3006 #if defined(PETSC_USE_CTABLE) 3007 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3008 #else 3009 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3010 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3011 #endif 3012 } else a->colmap = NULL; 3013 if (oldmat->garray) { 3014 PetscInt len; 3015 len = oldmat->B->cmap->n; 3016 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3017 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3018 } else a->garray = NULL; 3019 3020 /* It may happen MatDuplicate is called with a non-assembled matrix 3021 In fact, MatDuplicate only requires the matrix to be preallocated 3022 This may happen inside a DMCreateMatrix_Shell */ 3023 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3024 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3025 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3026 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3027 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3028 *newmat = mat; 3029 PetscFunctionReturn(PETSC_SUCCESS); 3030 } 3031 3032 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3033 { 3034 PetscBool isbinary, ishdf5; 3035 3036 PetscFunctionBegin; 3037 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3038 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3039 /* force binary viewer to load .info file if it has not yet done so */ 3040 PetscCall(PetscViewerSetUp(viewer)); 3041 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3042 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3043 if (isbinary) { 3044 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3045 } else if (ishdf5) { 3046 #if defined(PETSC_HAVE_HDF5) 3047 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3048 #else 3049 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3050 #endif 3051 } else { 3052 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3053 } 3054 PetscFunctionReturn(PETSC_SUCCESS); 3055 } 3056 3057 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3058 { 3059 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3060 PetscInt *rowidxs, *colidxs; 3061 PetscScalar *matvals; 3062 3063 PetscFunctionBegin; 3064 PetscCall(PetscViewerSetUp(viewer)); 3065 3066 /* read in matrix header */ 3067 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3068 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3069 M = header[1]; 3070 N = header[2]; 3071 nz = header[3]; 3072 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3073 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3074 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3075 3076 /* set block sizes from the viewer's .info file */ 3077 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3078 /* set global sizes if not set already */ 3079 if (mat->rmap->N < 0) mat->rmap->N = M; 3080 if (mat->cmap->N < 0) mat->cmap->N = N; 3081 PetscCall(PetscLayoutSetUp(mat->rmap)); 3082 PetscCall(PetscLayoutSetUp(mat->cmap)); 3083 3084 /* check if the matrix sizes are correct */ 3085 PetscCall(MatGetSize(mat, &rows, &cols)); 3086 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3087 3088 /* read in row lengths and build row indices */ 3089 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3090 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3091 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3092 rowidxs[0] = 0; 3093 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3094 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3095 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3096 /* read in column indices and matrix values */ 3097 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3098 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3100 /* store matrix indices and values */ 3101 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3102 PetscCall(PetscFree(rowidxs)); 3103 PetscCall(PetscFree2(colidxs, matvals)); 3104 PetscFunctionReturn(PETSC_SUCCESS); 3105 } 3106 3107 /* Not scalable because of ISAllGather() unless getting all columns. */ 3108 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3109 { 3110 IS iscol_local; 3111 PetscBool isstride; 3112 PetscMPIInt lisstride = 0, gisstride; 3113 3114 PetscFunctionBegin; 3115 /* check if we are grabbing all columns*/ 3116 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3117 3118 if (isstride) { 3119 PetscInt start, len, mstart, mlen; 3120 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3121 PetscCall(ISGetLocalSize(iscol, &len)); 3122 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3123 if (mstart == start && mlen - mstart == len) lisstride = 1; 3124 } 3125 3126 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3127 if (gisstride) { 3128 PetscInt N; 3129 PetscCall(MatGetSize(mat, NULL, &N)); 3130 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3131 PetscCall(ISSetIdentity(iscol_local)); 3132 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3133 } else { 3134 PetscInt cbs; 3135 PetscCall(ISGetBlockSize(iscol, &cbs)); 3136 PetscCall(ISAllGather(iscol, &iscol_local)); 3137 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3138 } 3139 3140 *isseq = iscol_local; 3141 PetscFunctionReturn(PETSC_SUCCESS); 3142 } 3143 3144 /* 3145 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3146 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3147 3148 Input Parameters: 3149 + mat - matrix 3150 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3151 i.e., mat->rstart <= isrow[i] < mat->rend 3152 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3153 i.e., mat->cstart <= iscol[i] < mat->cend 3154 3155 Output Parameters: 3156 + isrow_d - sequential row index set for retrieving mat->A 3157 . iscol_d - sequential column index set for retrieving mat->A 3158 . iscol_o - sequential column index set for retrieving mat->B 3159 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3160 */ 3161 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3162 { 3163 Vec x, cmap; 3164 const PetscInt *is_idx; 3165 PetscScalar *xarray, *cmaparray; 3166 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3167 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3168 Mat B = a->B; 3169 Vec lvec = a->lvec, lcmap; 3170 PetscInt i, cstart, cend, Bn = B->cmap->N; 3171 MPI_Comm comm; 3172 VecScatter Mvctx = a->Mvctx; 3173 3174 PetscFunctionBegin; 3175 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3176 PetscCall(ISGetLocalSize(iscol, &ncols)); 3177 3178 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3179 PetscCall(MatCreateVecs(mat, &x, NULL)); 3180 PetscCall(VecSet(x, -1.0)); 3181 PetscCall(VecDuplicate(x, &cmap)); 3182 PetscCall(VecSet(cmap, -1.0)); 3183 3184 /* Get start indices */ 3185 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3186 isstart -= ncols; 3187 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3188 3189 PetscCall(ISGetIndices(iscol, &is_idx)); 3190 PetscCall(VecGetArray(x, &xarray)); 3191 PetscCall(VecGetArray(cmap, &cmaparray)); 3192 PetscCall(PetscMalloc1(ncols, &idx)); 3193 for (i = 0; i < ncols; i++) { 3194 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3195 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3196 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3197 } 3198 PetscCall(VecRestoreArray(x, &xarray)); 3199 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3200 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3201 3202 /* Get iscol_d */ 3203 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3204 PetscCall(ISGetBlockSize(iscol, &i)); 3205 PetscCall(ISSetBlockSize(*iscol_d, i)); 3206 3207 /* Get isrow_d */ 3208 PetscCall(ISGetLocalSize(isrow, &m)); 3209 rstart = mat->rmap->rstart; 3210 PetscCall(PetscMalloc1(m, &idx)); 3211 PetscCall(ISGetIndices(isrow, &is_idx)); 3212 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3213 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3214 3215 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3216 PetscCall(ISGetBlockSize(isrow, &i)); 3217 PetscCall(ISSetBlockSize(*isrow_d, i)); 3218 3219 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3220 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3222 3223 PetscCall(VecDuplicate(lvec, &lcmap)); 3224 3225 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3227 3228 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3229 /* off-process column indices */ 3230 count = 0; 3231 PetscCall(PetscMalloc1(Bn, &idx)); 3232 PetscCall(PetscMalloc1(Bn, &cmap1)); 3233 3234 PetscCall(VecGetArray(lvec, &xarray)); 3235 PetscCall(VecGetArray(lcmap, &cmaparray)); 3236 for (i = 0; i < Bn; i++) { 3237 if (PetscRealPart(xarray[i]) > -1.0) { 3238 idx[count] = i; /* local column index in off-diagonal part B */ 3239 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3240 count++; 3241 } 3242 } 3243 PetscCall(VecRestoreArray(lvec, &xarray)); 3244 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3245 3246 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3247 /* cannot ensure iscol_o has same blocksize as iscol! */ 3248 3249 PetscCall(PetscFree(idx)); 3250 *garray = cmap1; 3251 3252 PetscCall(VecDestroy(&x)); 3253 PetscCall(VecDestroy(&cmap)); 3254 PetscCall(VecDestroy(&lcmap)); 3255 PetscFunctionReturn(PETSC_SUCCESS); 3256 } 3257 3258 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3259 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3260 { 3261 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3262 Mat M = NULL; 3263 MPI_Comm comm; 3264 IS iscol_d, isrow_d, iscol_o; 3265 Mat Asub = NULL, Bsub = NULL; 3266 PetscInt n; 3267 3268 PetscFunctionBegin; 3269 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3270 3271 if (call == MAT_REUSE_MATRIX) { 3272 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3273 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3274 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3275 3276 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3277 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3278 3279 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3280 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3281 3282 /* Update diagonal and off-diagonal portions of submat */ 3283 asub = (Mat_MPIAIJ *)(*submat)->data; 3284 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3285 PetscCall(ISGetLocalSize(iscol_o, &n)); 3286 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3287 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3288 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3289 3290 } else { /* call == MAT_INITIAL_MATRIX) */ 3291 const PetscInt *garray; 3292 PetscInt BsubN; 3293 3294 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3295 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3296 3297 /* Create local submatrices Asub and Bsub */ 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3299 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3300 3301 /* Create submatrix M */ 3302 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3303 3304 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3305 asub = (Mat_MPIAIJ *)M->data; 3306 3307 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3308 n = asub->B->cmap->N; 3309 if (BsubN > n) { 3310 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3311 const PetscInt *idx; 3312 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3313 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3314 3315 PetscCall(PetscMalloc1(n, &idx_new)); 3316 j = 0; 3317 PetscCall(ISGetIndices(iscol_o, &idx)); 3318 for (i = 0; i < n; i++) { 3319 if (j >= BsubN) break; 3320 while (subgarray[i] > garray[j]) j++; 3321 3322 if (subgarray[i] == garray[j]) { 3323 idx_new[i] = idx[j++]; 3324 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3325 } 3326 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3327 3328 PetscCall(ISDestroy(&iscol_o)); 3329 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3330 3331 } else if (BsubN < n) { 3332 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3333 } 3334 3335 PetscCall(PetscFree(garray)); 3336 *submat = M; 3337 3338 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3339 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3340 PetscCall(ISDestroy(&isrow_d)); 3341 3342 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3343 PetscCall(ISDestroy(&iscol_d)); 3344 3345 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3346 PetscCall(ISDestroy(&iscol_o)); 3347 } 3348 PetscFunctionReturn(PETSC_SUCCESS); 3349 } 3350 3351 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3352 { 3353 IS iscol_local = NULL, isrow_d; 3354 PetscInt csize; 3355 PetscInt n, i, j, start, end; 3356 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3357 MPI_Comm comm; 3358 3359 PetscFunctionBegin; 3360 /* If isrow has same processor distribution as mat, 3361 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3362 if (call == MAT_REUSE_MATRIX) { 3363 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3364 if (isrow_d) { 3365 sameRowDist = PETSC_TRUE; 3366 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3367 } else { 3368 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3369 if (iscol_local) { 3370 sameRowDist = PETSC_TRUE; 3371 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3372 } 3373 } 3374 } else { 3375 /* Check if isrow has same processor distribution as mat */ 3376 sameDist[0] = PETSC_FALSE; 3377 PetscCall(ISGetLocalSize(isrow, &n)); 3378 if (!n) { 3379 sameDist[0] = PETSC_TRUE; 3380 } else { 3381 PetscCall(ISGetMinMax(isrow, &i, &j)); 3382 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3383 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3384 } 3385 3386 /* Check if iscol has same processor distribution as mat */ 3387 sameDist[1] = PETSC_FALSE; 3388 PetscCall(ISGetLocalSize(iscol, &n)); 3389 if (!n) { 3390 sameDist[1] = PETSC_TRUE; 3391 } else { 3392 PetscCall(ISGetMinMax(iscol, &i, &j)); 3393 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3394 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3395 } 3396 3397 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3398 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3399 sameRowDist = tsameDist[0]; 3400 } 3401 3402 if (sameRowDist) { 3403 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3404 /* isrow and iscol have same processor distribution as mat */ 3405 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3406 PetscFunctionReturn(PETSC_SUCCESS); 3407 } else { /* sameRowDist */ 3408 /* isrow has same processor distribution as mat */ 3409 if (call == MAT_INITIAL_MATRIX) { 3410 PetscBool sorted; 3411 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3412 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3413 PetscCall(ISGetSize(iscol, &i)); 3414 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3415 3416 PetscCall(ISSorted(iscol_local, &sorted)); 3417 if (sorted) { 3418 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3419 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3420 PetscFunctionReturn(PETSC_SUCCESS); 3421 } 3422 } else { /* call == MAT_REUSE_MATRIX */ 3423 IS iscol_sub; 3424 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3425 if (iscol_sub) { 3426 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3427 PetscFunctionReturn(PETSC_SUCCESS); 3428 } 3429 } 3430 } 3431 } 3432 3433 /* General case: iscol -> iscol_local which has global size of iscol */ 3434 if (call == MAT_REUSE_MATRIX) { 3435 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3436 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3437 } else { 3438 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3439 } 3440 3441 PetscCall(ISGetLocalSize(iscol, &csize)); 3442 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3443 3444 if (call == MAT_INITIAL_MATRIX) { 3445 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3446 PetscCall(ISDestroy(&iscol_local)); 3447 } 3448 PetscFunctionReturn(PETSC_SUCCESS); 3449 } 3450 3451 /*@C 3452 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3453 and "off-diagonal" part of the matrix in CSR format. 3454 3455 Collective 3456 3457 Input Parameters: 3458 + comm - MPI communicator 3459 . A - "diagonal" portion of matrix 3460 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3461 - garray - global index of `B` columns 3462 3463 Output Parameter: 3464 . mat - the matrix, with input `A` as its local diagonal matrix 3465 3466 Level: advanced 3467 3468 Notes: 3469 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3470 3471 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3472 3473 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3474 @*/ 3475 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3476 { 3477 Mat_MPIAIJ *maij; 3478 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3479 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3480 const PetscScalar *oa; 3481 Mat Bnew; 3482 PetscInt m, n, N; 3483 MatType mpi_mat_type; 3484 3485 PetscFunctionBegin; 3486 PetscCall(MatCreate(comm, mat)); 3487 PetscCall(MatGetSize(A, &m, &n)); 3488 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3489 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3490 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3491 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3492 3493 /* Get global columns of mat */ 3494 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3495 3496 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3497 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3498 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3499 PetscCall(MatSetType(*mat, mpi_mat_type)); 3500 3501 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3502 maij = (Mat_MPIAIJ *)(*mat)->data; 3503 3504 (*mat)->preallocated = PETSC_TRUE; 3505 3506 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3507 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3508 3509 /* Set A as diagonal portion of *mat */ 3510 maij->A = A; 3511 3512 nz = oi[m]; 3513 for (i = 0; i < nz; i++) { 3514 col = oj[i]; 3515 oj[i] = garray[col]; 3516 } 3517 3518 /* Set Bnew as off-diagonal portion of *mat */ 3519 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3520 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3521 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3522 bnew = (Mat_SeqAIJ *)Bnew->data; 3523 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3524 maij->B = Bnew; 3525 3526 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3527 3528 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3529 b->free_a = PETSC_FALSE; 3530 b->free_ij = PETSC_FALSE; 3531 PetscCall(MatDestroy(&B)); 3532 3533 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3534 bnew->free_a = PETSC_TRUE; 3535 bnew->free_ij = PETSC_TRUE; 3536 3537 /* condense columns of maij->B */ 3538 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3539 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3540 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3541 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3542 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3543 PetscFunctionReturn(PETSC_SUCCESS); 3544 } 3545 3546 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3547 3548 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3549 { 3550 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3551 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3552 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3553 Mat M, Msub, B = a->B; 3554 MatScalar *aa; 3555 Mat_SeqAIJ *aij; 3556 PetscInt *garray = a->garray, *colsub, Ncols; 3557 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3558 IS iscol_sub, iscmap; 3559 const PetscInt *is_idx, *cmap; 3560 PetscBool allcolumns = PETSC_FALSE; 3561 MPI_Comm comm; 3562 3563 PetscFunctionBegin; 3564 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3565 if (call == MAT_REUSE_MATRIX) { 3566 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3567 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3568 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3569 3570 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3571 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3572 3573 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3574 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3575 3576 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3577 3578 } else { /* call == MAT_INITIAL_MATRIX) */ 3579 PetscBool flg; 3580 3581 PetscCall(ISGetLocalSize(iscol, &n)); 3582 PetscCall(ISGetSize(iscol, &Ncols)); 3583 3584 /* (1) iscol -> nonscalable iscol_local */ 3585 /* Check for special case: each processor gets entire matrix columns */ 3586 PetscCall(ISIdentity(iscol_local, &flg)); 3587 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3588 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3589 if (allcolumns) { 3590 iscol_sub = iscol_local; 3591 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3592 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3593 3594 } else { 3595 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3596 PetscInt *idx, *cmap1, k; 3597 PetscCall(PetscMalloc1(Ncols, &idx)); 3598 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3599 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3600 count = 0; 3601 k = 0; 3602 for (i = 0; i < Ncols; i++) { 3603 j = is_idx[i]; 3604 if (j >= cstart && j < cend) { 3605 /* diagonal part of mat */ 3606 idx[count] = j; 3607 cmap1[count++] = i; /* column index in submat */ 3608 } else if (Bn) { 3609 /* off-diagonal part of mat */ 3610 if (j == garray[k]) { 3611 idx[count] = j; 3612 cmap1[count++] = i; /* column index in submat */ 3613 } else if (j > garray[k]) { 3614 while (j > garray[k] && k < Bn - 1) k++; 3615 if (j == garray[k]) { 3616 idx[count] = j; 3617 cmap1[count++] = i; /* column index in submat */ 3618 } 3619 } 3620 } 3621 } 3622 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3623 3624 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3625 PetscCall(ISGetBlockSize(iscol, &cbs)); 3626 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3627 3628 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3629 } 3630 3631 /* (3) Create sequential Msub */ 3632 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3633 } 3634 3635 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3636 aij = (Mat_SeqAIJ *)(Msub)->data; 3637 ii = aij->i; 3638 PetscCall(ISGetIndices(iscmap, &cmap)); 3639 3640 /* 3641 m - number of local rows 3642 Ncols - number of columns (same on all processors) 3643 rstart - first row in new global matrix generated 3644 */ 3645 PetscCall(MatGetSize(Msub, &m, NULL)); 3646 3647 if (call == MAT_INITIAL_MATRIX) { 3648 /* (4) Create parallel newmat */ 3649 PetscMPIInt rank, size; 3650 PetscInt csize; 3651 3652 PetscCallMPI(MPI_Comm_size(comm, &size)); 3653 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3654 3655 /* 3656 Determine the number of non-zeros in the diagonal and off-diagonal 3657 portions of the matrix in order to do correct preallocation 3658 */ 3659 3660 /* first get start and end of "diagonal" columns */ 3661 PetscCall(ISGetLocalSize(iscol, &csize)); 3662 if (csize == PETSC_DECIDE) { 3663 PetscCall(ISGetSize(isrow, &mglobal)); 3664 if (mglobal == Ncols) { /* square matrix */ 3665 nlocal = m; 3666 } else { 3667 nlocal = Ncols / size + ((Ncols % size) > rank); 3668 } 3669 } else { 3670 nlocal = csize; 3671 } 3672 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3673 rstart = rend - nlocal; 3674 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3675 3676 /* next, compute all the lengths */ 3677 jj = aij->j; 3678 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3679 olens = dlens + m; 3680 for (i = 0; i < m; i++) { 3681 jend = ii[i + 1] - ii[i]; 3682 olen = 0; 3683 dlen = 0; 3684 for (j = 0; j < jend; j++) { 3685 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3686 else dlen++; 3687 jj++; 3688 } 3689 olens[i] = olen; 3690 dlens[i] = dlen; 3691 } 3692 3693 PetscCall(ISGetBlockSize(isrow, &bs)); 3694 PetscCall(ISGetBlockSize(iscol, &cbs)); 3695 3696 PetscCall(MatCreate(comm, &M)); 3697 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3698 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3699 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3700 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3701 PetscCall(PetscFree(dlens)); 3702 3703 } else { /* call == MAT_REUSE_MATRIX */ 3704 M = *newmat; 3705 PetscCall(MatGetLocalSize(M, &i, NULL)); 3706 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3707 PetscCall(MatZeroEntries(M)); 3708 /* 3709 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3710 rather than the slower MatSetValues(). 3711 */ 3712 M->was_assembled = PETSC_TRUE; 3713 M->assembled = PETSC_FALSE; 3714 } 3715 3716 /* (5) Set values of Msub to *newmat */ 3717 PetscCall(PetscMalloc1(count, &colsub)); 3718 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3719 3720 jj = aij->j; 3721 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3722 for (i = 0; i < m; i++) { 3723 row = rstart + i; 3724 nz = ii[i + 1] - ii[i]; 3725 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3726 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3727 jj += nz; 3728 aa += nz; 3729 } 3730 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3731 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3732 3733 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3734 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3735 3736 PetscCall(PetscFree(colsub)); 3737 3738 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3739 if (call == MAT_INITIAL_MATRIX) { 3740 *newmat = M; 3741 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3742 PetscCall(MatDestroy(&Msub)); 3743 3744 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3745 PetscCall(ISDestroy(&iscol_sub)); 3746 3747 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3748 PetscCall(ISDestroy(&iscmap)); 3749 3750 if (iscol_local) { 3751 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3752 PetscCall(ISDestroy(&iscol_local)); 3753 } 3754 } 3755 PetscFunctionReturn(PETSC_SUCCESS); 3756 } 3757 3758 /* 3759 Not great since it makes two copies of the submatrix, first an SeqAIJ 3760 in local and then by concatenating the local matrices the end result. 3761 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3762 3763 This requires a sequential iscol with all indices. 3764 */ 3765 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3766 { 3767 PetscMPIInt rank, size; 3768 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3769 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3770 Mat M, Mreuse; 3771 MatScalar *aa, *vwork; 3772 MPI_Comm comm; 3773 Mat_SeqAIJ *aij; 3774 PetscBool colflag, allcolumns = PETSC_FALSE; 3775 3776 PetscFunctionBegin; 3777 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3778 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3779 PetscCallMPI(MPI_Comm_size(comm, &size)); 3780 3781 /* Check for special case: each processor gets entire matrix columns */ 3782 PetscCall(ISIdentity(iscol, &colflag)); 3783 PetscCall(ISGetLocalSize(iscol, &n)); 3784 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3785 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3786 3787 if (call == MAT_REUSE_MATRIX) { 3788 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3789 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3790 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3791 } else { 3792 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3793 } 3794 3795 /* 3796 m - number of local rows 3797 n - number of columns (same on all processors) 3798 rstart - first row in new global matrix generated 3799 */ 3800 PetscCall(MatGetSize(Mreuse, &m, &n)); 3801 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3802 if (call == MAT_INITIAL_MATRIX) { 3803 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3804 ii = aij->i; 3805 jj = aij->j; 3806 3807 /* 3808 Determine the number of non-zeros in the diagonal and off-diagonal 3809 portions of the matrix in order to do correct preallocation 3810 */ 3811 3812 /* first get start and end of "diagonal" columns */ 3813 if (csize == PETSC_DECIDE) { 3814 PetscCall(ISGetSize(isrow, &mglobal)); 3815 if (mglobal == n) { /* square matrix */ 3816 nlocal = m; 3817 } else { 3818 nlocal = n / size + ((n % size) > rank); 3819 } 3820 } else { 3821 nlocal = csize; 3822 } 3823 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3824 rstart = rend - nlocal; 3825 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3826 3827 /* next, compute all the lengths */ 3828 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3829 olens = dlens + m; 3830 for (i = 0; i < m; i++) { 3831 jend = ii[i + 1] - ii[i]; 3832 olen = 0; 3833 dlen = 0; 3834 for (j = 0; j < jend; j++) { 3835 if (*jj < rstart || *jj >= rend) olen++; 3836 else dlen++; 3837 jj++; 3838 } 3839 olens[i] = olen; 3840 dlens[i] = dlen; 3841 } 3842 PetscCall(MatCreate(comm, &M)); 3843 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3844 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3845 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3846 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3847 PetscCall(PetscFree(dlens)); 3848 } else { 3849 PetscInt ml, nl; 3850 3851 M = *newmat; 3852 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3853 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3854 PetscCall(MatZeroEntries(M)); 3855 /* 3856 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3857 rather than the slower MatSetValues(). 3858 */ 3859 M->was_assembled = PETSC_TRUE; 3860 M->assembled = PETSC_FALSE; 3861 } 3862 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3863 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3864 ii = aij->i; 3865 jj = aij->j; 3866 3867 /* trigger copy to CPU if needed */ 3868 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3869 for (i = 0; i < m; i++) { 3870 row = rstart + i; 3871 nz = ii[i + 1] - ii[i]; 3872 cwork = jj; 3873 jj += nz; 3874 vwork = aa; 3875 aa += nz; 3876 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3877 } 3878 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3879 3880 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3881 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3882 *newmat = M; 3883 3884 /* save submatrix used in processor for next request */ 3885 if (call == MAT_INITIAL_MATRIX) { 3886 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3887 PetscCall(MatDestroy(&Mreuse)); 3888 } 3889 PetscFunctionReturn(PETSC_SUCCESS); 3890 } 3891 3892 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3893 { 3894 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3895 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3896 const PetscInt *JJ; 3897 PetscBool nooffprocentries; 3898 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3899 3900 PetscFunctionBegin; 3901 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3902 3903 PetscCall(PetscLayoutSetUp(B->rmap)); 3904 PetscCall(PetscLayoutSetUp(B->cmap)); 3905 m = B->rmap->n; 3906 cstart = B->cmap->rstart; 3907 cend = B->cmap->rend; 3908 rstart = B->rmap->rstart; 3909 3910 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3911 3912 if (PetscDefined(USE_DEBUG)) { 3913 for (i = 0; i < m; i++) { 3914 nnz = Ii[i + 1] - Ii[i]; 3915 JJ = J + Ii[i]; 3916 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3917 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3918 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3919 } 3920 } 3921 3922 for (i = 0; i < m; i++) { 3923 nnz = Ii[i + 1] - Ii[i]; 3924 JJ = J + Ii[i]; 3925 nnz_max = PetscMax(nnz_max, nnz); 3926 d = 0; 3927 for (j = 0; j < nnz; j++) { 3928 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3929 } 3930 d_nnz[i] = d; 3931 o_nnz[i] = nnz - d; 3932 } 3933 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3934 PetscCall(PetscFree2(d_nnz, o_nnz)); 3935 3936 for (i = 0; i < m; i++) { 3937 ii = i + rstart; 3938 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3939 } 3940 nooffprocentries = B->nooffprocentries; 3941 B->nooffprocentries = PETSC_TRUE; 3942 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3943 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3944 B->nooffprocentries = nooffprocentries; 3945 3946 /* count number of entries below block diagonal */ 3947 PetscCall(PetscFree(Aij->ld)); 3948 PetscCall(PetscCalloc1(m, &ld)); 3949 Aij->ld = ld; 3950 for (i = 0; i < m; i++) { 3951 nnz = Ii[i + 1] - Ii[i]; 3952 j = 0; 3953 while (j < nnz && J[j] < cstart) j++; 3954 ld[i] = j; 3955 J += nnz; 3956 } 3957 3958 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3959 PetscFunctionReturn(PETSC_SUCCESS); 3960 } 3961 3962 /*@ 3963 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3964 (the default parallel PETSc format). 3965 3966 Collective 3967 3968 Input Parameters: 3969 + B - the matrix 3970 . i - the indices into j for the start of each local row (starts with zero) 3971 . j - the column indices for each local row (starts with zero) 3972 - v - optional values in the matrix 3973 3974 Level: developer 3975 3976 Notes: 3977 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3978 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3979 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3980 3981 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3982 3983 The format which is used for the sparse matrix input, is equivalent to a 3984 row-major ordering.. i.e for the following matrix, the input data expected is 3985 as shown 3986 3987 .vb 3988 1 0 0 3989 2 0 3 P0 3990 ------- 3991 4 5 6 P1 3992 3993 Process0 [P0] rows_owned=[0,1] 3994 i = {0,1,3} [size = nrow+1 = 2+1] 3995 j = {0,0,2} [size = 3] 3996 v = {1,2,3} [size = 3] 3997 3998 Process1 [P1] rows_owned=[2] 3999 i = {0,3} [size = nrow+1 = 1+1] 4000 j = {0,1,2} [size = 3] 4001 v = {4,5,6} [size = 3] 4002 .ve 4003 4004 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 4005 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 4006 @*/ 4007 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4008 { 4009 PetscFunctionBegin; 4010 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4011 PetscFunctionReturn(PETSC_SUCCESS); 4012 } 4013 4014 /*@C 4015 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4016 (the default parallel PETSc format). For good matrix assembly performance 4017 the user should preallocate the matrix storage by setting the parameters 4018 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4019 4020 Collective 4021 4022 Input Parameters: 4023 + B - the matrix 4024 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4025 (same value is used for all local rows) 4026 . d_nnz - array containing the number of nonzeros in the various rows of the 4027 DIAGONAL portion of the local submatrix (possibly different for each row) 4028 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4029 The size of this array is equal to the number of local rows, i.e 'm'. 4030 For matrices that will be factored, you must leave room for (and set) 4031 the diagonal entry even if it is zero. 4032 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4033 submatrix (same value is used for all local rows). 4034 - o_nnz - array containing the number of nonzeros in the various rows of the 4035 OFF-DIAGONAL portion of the local submatrix (possibly different for 4036 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4037 structure. The size of this array is equal to the number 4038 of local rows, i.e 'm'. 4039 4040 Usage: 4041 Consider the following 8x8 matrix with 34 non-zero values, that is 4042 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4043 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4044 as follows 4045 4046 .vb 4047 1 2 0 | 0 3 0 | 0 4 4048 Proc0 0 5 6 | 7 0 0 | 8 0 4049 9 0 10 | 11 0 0 | 12 0 4050 ------------------------------------- 4051 13 0 14 | 15 16 17 | 0 0 4052 Proc1 0 18 0 | 19 20 21 | 0 0 4053 0 0 0 | 22 23 0 | 24 0 4054 ------------------------------------- 4055 Proc2 25 26 27 | 0 0 28 | 29 0 4056 30 0 0 | 31 32 33 | 0 34 4057 .ve 4058 4059 This can be represented as a collection of submatrices as 4060 .vb 4061 A B C 4062 D E F 4063 G H I 4064 .ve 4065 4066 Where the submatrices A,B,C are owned by proc0, D,E,F are 4067 owned by proc1, G,H,I are owned by proc2. 4068 4069 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4070 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4071 The 'M','N' parameters are 8,8, and have the same values on all procs. 4072 4073 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4074 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4075 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4076 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4077 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4078 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4079 4080 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4081 allocated for every row of the local diagonal submatrix, and `o_nz` 4082 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4083 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4084 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4085 In this case, the values of `d_nz`, `o_nz` are 4086 .vb 4087 proc0 dnz = 2, o_nz = 2 4088 proc1 dnz = 3, o_nz = 2 4089 proc2 dnz = 1, o_nz = 4 4090 .ve 4091 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4092 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4093 for proc3. i.e we are using 12+15+10=37 storage locations to store 4094 34 values. 4095 4096 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4097 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4098 In the above case the values for `d_nnz`, `o_nnz` are 4099 .vb 4100 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4101 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4102 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4103 .ve 4104 Here the space allocated is sum of all the above values i.e 34, and 4105 hence pre-allocation is perfect. 4106 4107 Level: intermediate 4108 4109 Notes: 4110 If the *_nnz parameter is given then the *_nz parameter is ignored 4111 4112 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4113 storage. The stored row and column indices begin with zero. 4114 See [Sparse Matrices](sec_matsparse) for details. 4115 4116 The parallel matrix is partitioned such that the first m0 rows belong to 4117 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4118 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4119 4120 The DIAGONAL portion of the local submatrix of a processor can be defined 4121 as the submatrix which is obtained by extraction the part corresponding to 4122 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4123 first row that belongs to the processor, r2 is the last row belonging to 4124 the this processor, and c1-c2 is range of indices of the local part of a 4125 vector suitable for applying the matrix to. This is an mxn matrix. In the 4126 common case of a square matrix, the row and column ranges are the same and 4127 the DIAGONAL part is also square. The remaining portion of the local 4128 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4129 4130 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4131 4132 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4133 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4134 You can also run with the option `-info` and look for messages with the string 4135 malloc in them to see if additional memory allocation was needed. 4136 4137 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4138 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4139 @*/ 4140 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4141 { 4142 PetscFunctionBegin; 4143 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4144 PetscValidType(B, 1); 4145 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4146 PetscFunctionReturn(PETSC_SUCCESS); 4147 } 4148 4149 /*@ 4150 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4151 CSR format for the local rows. 4152 4153 Collective 4154 4155 Input Parameters: 4156 + comm - MPI communicator 4157 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4158 . n - This value should be the same as the local size used in creating the 4159 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4160 calculated if N is given) For square matrices n is almost always m. 4161 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4162 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4163 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4164 . j - column indices 4165 - a - optional matrix values 4166 4167 Output Parameter: 4168 . mat - the matrix 4169 4170 Level: intermediate 4171 4172 Notes: 4173 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4174 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4175 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4176 4177 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4178 4179 The format which is used for the sparse matrix input, is equivalent to a 4180 row-major ordering.. i.e for the following matrix, the input data expected is 4181 as shown 4182 4183 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4184 .vb 4185 1 0 0 4186 2 0 3 P0 4187 ------- 4188 4 5 6 P1 4189 4190 Process0 [P0] rows_owned=[0,1] 4191 i = {0,1,3} [size = nrow+1 = 2+1] 4192 j = {0,0,2} [size = 3] 4193 v = {1,2,3} [size = 3] 4194 4195 Process1 [P1] rows_owned=[2] 4196 i = {0,3} [size = nrow+1 = 1+1] 4197 j = {0,1,2} [size = 3] 4198 v = {4,5,6} [size = 3] 4199 .ve 4200 4201 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4202 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4203 @*/ 4204 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4205 { 4206 PetscFunctionBegin; 4207 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4208 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4209 PetscCall(MatCreate(comm, mat)); 4210 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4211 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4212 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4213 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4214 PetscFunctionReturn(PETSC_SUCCESS); 4215 } 4216 4217 /*@ 4218 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4219 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4220 from `MatCreateMPIAIJWithArrays()` 4221 4222 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4223 4224 Collective 4225 4226 Input Parameters: 4227 + mat - the matrix 4228 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4229 . n - This value should be the same as the local size used in creating the 4230 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4231 calculated if N is given) For square matrices n is almost always m. 4232 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4233 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4234 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4235 . J - column indices 4236 - v - matrix values 4237 4238 Level: deprecated 4239 4240 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4241 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4242 @*/ 4243 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4244 { 4245 PetscInt nnz, i; 4246 PetscBool nooffprocentries; 4247 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4248 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4249 PetscScalar *ad, *ao; 4250 PetscInt ldi, Iii, md; 4251 const PetscInt *Adi = Ad->i; 4252 PetscInt *ld = Aij->ld; 4253 4254 PetscFunctionBegin; 4255 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4256 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4257 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4258 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4259 4260 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4261 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4262 4263 for (i = 0; i < m; i++) { 4264 nnz = Ii[i + 1] - Ii[i]; 4265 Iii = Ii[i]; 4266 ldi = ld[i]; 4267 md = Adi[i + 1] - Adi[i]; 4268 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4269 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4270 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4271 ad += md; 4272 ao += nnz - md; 4273 } 4274 nooffprocentries = mat->nooffprocentries; 4275 mat->nooffprocentries = PETSC_TRUE; 4276 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4277 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4278 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4279 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4280 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4281 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4282 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4283 mat->nooffprocentries = nooffprocentries; 4284 PetscFunctionReturn(PETSC_SUCCESS); 4285 } 4286 4287 /*@ 4288 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4289 4290 Collective 4291 4292 Input Parameters: 4293 + mat - the matrix 4294 - v - matrix values, stored by row 4295 4296 Level: intermediate 4297 4298 Note: 4299 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4300 4301 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4302 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4303 @*/ 4304 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4305 { 4306 PetscInt nnz, i, m; 4307 PetscBool nooffprocentries; 4308 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4309 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4310 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4311 PetscScalar *ad, *ao; 4312 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4313 PetscInt ldi, Iii, md; 4314 PetscInt *ld = Aij->ld; 4315 4316 PetscFunctionBegin; 4317 m = mat->rmap->n; 4318 4319 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4320 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4321 Iii = 0; 4322 for (i = 0; i < m; i++) { 4323 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4324 ldi = ld[i]; 4325 md = Adi[i + 1] - Adi[i]; 4326 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4327 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4328 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4329 ad += md; 4330 ao += nnz - md; 4331 Iii += nnz; 4332 } 4333 nooffprocentries = mat->nooffprocentries; 4334 mat->nooffprocentries = PETSC_TRUE; 4335 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4336 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4337 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4338 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4339 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4340 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4341 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4342 mat->nooffprocentries = nooffprocentries; 4343 PetscFunctionReturn(PETSC_SUCCESS); 4344 } 4345 4346 /*@C 4347 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4348 (the default parallel PETSc format). For good matrix assembly performance 4349 the user should preallocate the matrix storage by setting the parameters 4350 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4351 4352 Collective 4353 4354 Input Parameters: 4355 + comm - MPI communicator 4356 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4357 This value should be the same as the local size used in creating the 4358 y vector for the matrix-vector product y = Ax. 4359 . n - This value should be the same as the local size used in creating the 4360 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4361 calculated if N is given) For square matrices n is almost always m. 4362 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4363 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4364 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4365 (same value is used for all local rows) 4366 . d_nnz - array containing the number of nonzeros in the various rows of the 4367 DIAGONAL portion of the local submatrix (possibly different for each row) 4368 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4369 The size of this array is equal to the number of local rows, i.e 'm'. 4370 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4371 submatrix (same value is used for all local rows). 4372 - o_nnz - array containing the number of nonzeros in the various rows of the 4373 OFF-DIAGONAL portion of the local submatrix (possibly different for 4374 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4375 structure. The size of this array is equal to the number 4376 of local rows, i.e 'm'. 4377 4378 Output Parameter: 4379 . A - the matrix 4380 4381 Options Database Keys: 4382 + -mat_no_inode - Do not use inodes 4383 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4384 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4385 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4386 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4387 4388 Level: intermediate 4389 4390 Notes: 4391 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4392 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4393 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4394 4395 If the *_nnz parameter is given then the *_nz parameter is ignored 4396 4397 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4398 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4399 storage requirements for this matrix. 4400 4401 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4402 processor than it must be used on all processors that share the object for 4403 that argument. 4404 4405 The user MUST specify either the local or global matrix dimensions 4406 (possibly both). 4407 4408 The parallel matrix is partitioned across processors such that the 4409 first m0 rows belong to process 0, the next m1 rows belong to 4410 process 1, the next m2 rows belong to process 2 etc.. where 4411 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4412 values corresponding to [m x N] submatrix. 4413 4414 The columns are logically partitioned with the n0 columns belonging 4415 to 0th partition, the next n1 columns belonging to the next 4416 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4417 4418 The DIAGONAL portion of the local submatrix on any given processor 4419 is the submatrix corresponding to the rows and columns m,n 4420 corresponding to the given processor. i.e diagonal matrix on 4421 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4422 etc. The remaining portion of the local submatrix [m x (N-n)] 4423 constitute the OFF-DIAGONAL portion. The example below better 4424 illustrates this concept. 4425 4426 For a square global matrix we define each processor's diagonal portion 4427 to be its local rows and the corresponding columns (a square submatrix); 4428 each processor's off-diagonal portion encompasses the remainder of the 4429 local matrix (a rectangular submatrix). 4430 4431 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4432 4433 When calling this routine with a single process communicator, a matrix of 4434 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4435 type of communicator, use the construction mechanism 4436 .vb 4437 MatCreate(...,&A); 4438 MatSetType(A,MATMPIAIJ); 4439 MatSetSizes(A, m,n,M,N); 4440 MatMPIAIJSetPreallocation(A,...); 4441 .ve 4442 4443 By default, this format uses inodes (identical nodes) when possible. 4444 We search for consecutive rows with the same nonzero structure, thereby 4445 reusing matrix information to achieve increased efficiency. 4446 4447 Usage: 4448 Consider the following 8x8 matrix with 34 non-zero values, that is 4449 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4450 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4451 as follows 4452 4453 .vb 4454 1 2 0 | 0 3 0 | 0 4 4455 Proc0 0 5 6 | 7 0 0 | 8 0 4456 9 0 10 | 11 0 0 | 12 0 4457 ------------------------------------- 4458 13 0 14 | 15 16 17 | 0 0 4459 Proc1 0 18 0 | 19 20 21 | 0 0 4460 0 0 0 | 22 23 0 | 24 0 4461 ------------------------------------- 4462 Proc2 25 26 27 | 0 0 28 | 29 0 4463 30 0 0 | 31 32 33 | 0 34 4464 .ve 4465 4466 This can be represented as a collection of submatrices as 4467 4468 .vb 4469 A B C 4470 D E F 4471 G H I 4472 .ve 4473 4474 Where the submatrices A,B,C are owned by proc0, D,E,F are 4475 owned by proc1, G,H,I are owned by proc2. 4476 4477 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4478 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4479 The 'M','N' parameters are 8,8, and have the same values on all procs. 4480 4481 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4482 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4483 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4484 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4485 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4486 matrix, ans [DF] as another SeqAIJ matrix. 4487 4488 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4489 allocated for every row of the local diagonal submatrix, and `o_nz` 4490 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4491 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4492 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4493 In this case, the values of `d_nz`,`o_nz` are 4494 .vb 4495 proc0 dnz = 2, o_nz = 2 4496 proc1 dnz = 3, o_nz = 2 4497 proc2 dnz = 1, o_nz = 4 4498 .ve 4499 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4500 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4501 for proc3. i.e we are using 12+15+10=37 storage locations to store 4502 34 values. 4503 4504 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4505 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4506 In the above case the values for d_nnz,o_nnz are 4507 .vb 4508 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4509 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4510 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4511 .ve 4512 Here the space allocated is sum of all the above values i.e 34, and 4513 hence pre-allocation is perfect. 4514 4515 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4516 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4517 @*/ 4518 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4519 { 4520 PetscMPIInt size; 4521 4522 PetscFunctionBegin; 4523 PetscCall(MatCreate(comm, A)); 4524 PetscCall(MatSetSizes(*A, m, n, M, N)); 4525 PetscCallMPI(MPI_Comm_size(comm, &size)); 4526 if (size > 1) { 4527 PetscCall(MatSetType(*A, MATMPIAIJ)); 4528 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4529 } else { 4530 PetscCall(MatSetType(*A, MATSEQAIJ)); 4531 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4532 } 4533 PetscFunctionReturn(PETSC_SUCCESS); 4534 } 4535 4536 /*MC 4537 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4538 4539 Synopsis: 4540 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4541 4542 Not Collective 4543 4544 Input Parameter: 4545 . A - the `MATMPIAIJ` matrix 4546 4547 Output Parameters: 4548 + Ad - the diagonal portion of the matrix 4549 . Ao - the off diagonal portion of the matrix 4550 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4551 - ierr - error code 4552 4553 Level: advanced 4554 4555 Note: 4556 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4557 4558 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4559 M*/ 4560 4561 /*MC 4562 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4563 4564 Synopsis: 4565 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4566 4567 Not Collective 4568 4569 Input Parameters: 4570 + A - the `MATMPIAIJ` matrix 4571 . Ad - the diagonal portion of the matrix 4572 . Ao - the off diagonal portion of the matrix 4573 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4574 - ierr - error code 4575 4576 Level: advanced 4577 4578 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4579 M*/ 4580 4581 /*@C 4582 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4583 4584 Not Collective 4585 4586 Input Parameter: 4587 . A - The `MATMPIAIJ` matrix 4588 4589 Output Parameters: 4590 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4591 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4592 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4593 4594 Level: intermediate 4595 4596 Note: 4597 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4598 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4599 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4600 local column numbers to global column numbers in the original matrix. 4601 4602 Fortran Note: 4603 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4604 4605 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4606 @*/ 4607 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4608 { 4609 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4610 PetscBool flg; 4611 4612 PetscFunctionBegin; 4613 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4614 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4615 if (Ad) *Ad = a->A; 4616 if (Ao) *Ao = a->B; 4617 if (colmap) *colmap = a->garray; 4618 PetscFunctionReturn(PETSC_SUCCESS); 4619 } 4620 4621 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4622 { 4623 PetscInt m, N, i, rstart, nnz, Ii; 4624 PetscInt *indx; 4625 PetscScalar *values; 4626 MatType rootType; 4627 4628 PetscFunctionBegin; 4629 PetscCall(MatGetSize(inmat, &m, &N)); 4630 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4631 PetscInt *dnz, *onz, sum, bs, cbs; 4632 4633 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4634 /* Check sum(n) = N */ 4635 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4636 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4637 4638 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4639 rstart -= m; 4640 4641 MatPreallocateBegin(comm, m, n, dnz, onz); 4642 for (i = 0; i < m; i++) { 4643 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4644 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4645 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4646 } 4647 4648 PetscCall(MatCreate(comm, outmat)); 4649 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4650 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4651 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4652 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4653 PetscCall(MatSetType(*outmat, rootType)); 4654 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4655 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4656 MatPreallocateEnd(dnz, onz); 4657 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4658 } 4659 4660 /* numeric phase */ 4661 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4662 for (i = 0; i < m; i++) { 4663 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4664 Ii = i + rstart; 4665 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4666 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4667 } 4668 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4669 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4670 PetscFunctionReturn(PETSC_SUCCESS); 4671 } 4672 4673 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4674 { 4675 PetscMPIInt rank; 4676 PetscInt m, N, i, rstart, nnz; 4677 size_t len; 4678 const PetscInt *indx; 4679 PetscViewer out; 4680 char *name; 4681 Mat B; 4682 const PetscScalar *values; 4683 4684 PetscFunctionBegin; 4685 PetscCall(MatGetLocalSize(A, &m, NULL)); 4686 PetscCall(MatGetSize(A, NULL, &N)); 4687 /* Should this be the type of the diagonal block of A? */ 4688 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4689 PetscCall(MatSetSizes(B, m, N, m, N)); 4690 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4691 PetscCall(MatSetType(B, MATSEQAIJ)); 4692 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4693 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4694 for (i = 0; i < m; i++) { 4695 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4696 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4697 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4698 } 4699 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4700 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4701 4702 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4703 PetscCall(PetscStrlen(outfile, &len)); 4704 PetscCall(PetscMalloc1(len + 6, &name)); 4705 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4706 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4707 PetscCall(PetscFree(name)); 4708 PetscCall(MatView(B, out)); 4709 PetscCall(PetscViewerDestroy(&out)); 4710 PetscCall(MatDestroy(&B)); 4711 PetscFunctionReturn(PETSC_SUCCESS); 4712 } 4713 4714 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4715 { 4716 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4717 4718 PetscFunctionBegin; 4719 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4720 PetscCall(PetscFree(merge->id_r)); 4721 PetscCall(PetscFree(merge->len_s)); 4722 PetscCall(PetscFree(merge->len_r)); 4723 PetscCall(PetscFree(merge->bi)); 4724 PetscCall(PetscFree(merge->bj)); 4725 PetscCall(PetscFree(merge->buf_ri[0])); 4726 PetscCall(PetscFree(merge->buf_ri)); 4727 PetscCall(PetscFree(merge->buf_rj[0])); 4728 PetscCall(PetscFree(merge->buf_rj)); 4729 PetscCall(PetscFree(merge->coi)); 4730 PetscCall(PetscFree(merge->coj)); 4731 PetscCall(PetscFree(merge->owners_co)); 4732 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4733 PetscCall(PetscFree(merge)); 4734 PetscFunctionReturn(PETSC_SUCCESS); 4735 } 4736 4737 #include <../src/mat/utils/freespace.h> 4738 #include <petscbt.h> 4739 4740 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4741 { 4742 MPI_Comm comm; 4743 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4744 PetscMPIInt size, rank, taga, *len_s; 4745 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4746 PetscInt proc, m; 4747 PetscInt **buf_ri, **buf_rj; 4748 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4749 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4750 MPI_Request *s_waits, *r_waits; 4751 MPI_Status *status; 4752 const MatScalar *aa, *a_a; 4753 MatScalar **abuf_r, *ba_i; 4754 Mat_Merge_SeqsToMPI *merge; 4755 PetscContainer container; 4756 4757 PetscFunctionBegin; 4758 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4759 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4760 4761 PetscCallMPI(MPI_Comm_size(comm, &size)); 4762 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4763 4764 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4765 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4766 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4767 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4768 aa = a_a; 4769 4770 bi = merge->bi; 4771 bj = merge->bj; 4772 buf_ri = merge->buf_ri; 4773 buf_rj = merge->buf_rj; 4774 4775 PetscCall(PetscMalloc1(size, &status)); 4776 owners = merge->rowmap->range; 4777 len_s = merge->len_s; 4778 4779 /* send and recv matrix values */ 4780 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4781 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4782 4783 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4784 for (proc = 0, k = 0; proc < size; proc++) { 4785 if (!len_s[proc]) continue; 4786 i = owners[proc]; 4787 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4788 k++; 4789 } 4790 4791 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4792 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4793 PetscCall(PetscFree(status)); 4794 4795 PetscCall(PetscFree(s_waits)); 4796 PetscCall(PetscFree(r_waits)); 4797 4798 /* insert mat values of mpimat */ 4799 PetscCall(PetscMalloc1(N, &ba_i)); 4800 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4801 4802 for (k = 0; k < merge->nrecv; k++) { 4803 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4804 nrows = *(buf_ri_k[k]); 4805 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4806 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4807 } 4808 4809 /* set values of ba */ 4810 m = merge->rowmap->n; 4811 for (i = 0; i < m; i++) { 4812 arow = owners[rank] + i; 4813 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4814 bnzi = bi[i + 1] - bi[i]; 4815 PetscCall(PetscArrayzero(ba_i, bnzi)); 4816 4817 /* add local non-zero vals of this proc's seqmat into ba */ 4818 anzi = ai[arow + 1] - ai[arow]; 4819 aj = a->j + ai[arow]; 4820 aa = a_a + ai[arow]; 4821 nextaj = 0; 4822 for (j = 0; nextaj < anzi; j++) { 4823 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4824 ba_i[j] += aa[nextaj++]; 4825 } 4826 } 4827 4828 /* add received vals into ba */ 4829 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4830 /* i-th row */ 4831 if (i == *nextrow[k]) { 4832 anzi = *(nextai[k] + 1) - *nextai[k]; 4833 aj = buf_rj[k] + *(nextai[k]); 4834 aa = abuf_r[k] + *(nextai[k]); 4835 nextaj = 0; 4836 for (j = 0; nextaj < anzi; j++) { 4837 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4838 ba_i[j] += aa[nextaj++]; 4839 } 4840 } 4841 nextrow[k]++; 4842 nextai[k]++; 4843 } 4844 } 4845 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4846 } 4847 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4848 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4849 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4850 4851 PetscCall(PetscFree(abuf_r[0])); 4852 PetscCall(PetscFree(abuf_r)); 4853 PetscCall(PetscFree(ba_i)); 4854 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4855 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4856 PetscFunctionReturn(PETSC_SUCCESS); 4857 } 4858 4859 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4860 { 4861 Mat B_mpi; 4862 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4863 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4864 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4865 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4866 PetscInt len, proc, *dnz, *onz, bs, cbs; 4867 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4868 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4869 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4870 MPI_Status *status; 4871 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4872 PetscBT lnkbt; 4873 Mat_Merge_SeqsToMPI *merge; 4874 PetscContainer container; 4875 4876 PetscFunctionBegin; 4877 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4878 4879 /* make sure it is a PETSc comm */ 4880 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4881 PetscCallMPI(MPI_Comm_size(comm, &size)); 4882 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4883 4884 PetscCall(PetscNew(&merge)); 4885 PetscCall(PetscMalloc1(size, &status)); 4886 4887 /* determine row ownership */ 4888 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4889 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4890 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4891 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4892 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4893 PetscCall(PetscMalloc1(size, &len_si)); 4894 PetscCall(PetscMalloc1(size, &merge->len_s)); 4895 4896 m = merge->rowmap->n; 4897 owners = merge->rowmap->range; 4898 4899 /* determine the number of messages to send, their lengths */ 4900 len_s = merge->len_s; 4901 4902 len = 0; /* length of buf_si[] */ 4903 merge->nsend = 0; 4904 for (proc = 0; proc < size; proc++) { 4905 len_si[proc] = 0; 4906 if (proc == rank) { 4907 len_s[proc] = 0; 4908 } else { 4909 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4910 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4911 } 4912 if (len_s[proc]) { 4913 merge->nsend++; 4914 nrows = 0; 4915 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4916 if (ai[i + 1] > ai[i]) nrows++; 4917 } 4918 len_si[proc] = 2 * (nrows + 1); 4919 len += len_si[proc]; 4920 } 4921 } 4922 4923 /* determine the number and length of messages to receive for ij-structure */ 4924 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4925 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4926 4927 /* post the Irecv of j-structure */ 4928 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4929 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4930 4931 /* post the Isend of j-structure */ 4932 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4933 4934 for (proc = 0, k = 0; proc < size; proc++) { 4935 if (!len_s[proc]) continue; 4936 i = owners[proc]; 4937 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4938 k++; 4939 } 4940 4941 /* receives and sends of j-structure are complete */ 4942 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4943 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4944 4945 /* send and recv i-structure */ 4946 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4947 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4948 4949 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4950 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4951 for (proc = 0, k = 0; proc < size; proc++) { 4952 if (!len_s[proc]) continue; 4953 /* form outgoing message for i-structure: 4954 buf_si[0]: nrows to be sent 4955 [1:nrows]: row index (global) 4956 [nrows+1:2*nrows+1]: i-structure index 4957 */ 4958 nrows = len_si[proc] / 2 - 1; 4959 buf_si_i = buf_si + nrows + 1; 4960 buf_si[0] = nrows; 4961 buf_si_i[0] = 0; 4962 nrows = 0; 4963 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4964 anzi = ai[i + 1] - ai[i]; 4965 if (anzi) { 4966 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4967 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4968 nrows++; 4969 } 4970 } 4971 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4972 k++; 4973 buf_si += len_si[proc]; 4974 } 4975 4976 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4977 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4978 4979 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4980 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4981 4982 PetscCall(PetscFree(len_si)); 4983 PetscCall(PetscFree(len_ri)); 4984 PetscCall(PetscFree(rj_waits)); 4985 PetscCall(PetscFree2(si_waits, sj_waits)); 4986 PetscCall(PetscFree(ri_waits)); 4987 PetscCall(PetscFree(buf_s)); 4988 PetscCall(PetscFree(status)); 4989 4990 /* compute a local seq matrix in each processor */ 4991 /* allocate bi array and free space for accumulating nonzero column info */ 4992 PetscCall(PetscMalloc1(m + 1, &bi)); 4993 bi[0] = 0; 4994 4995 /* create and initialize a linked list */ 4996 nlnk = N + 1; 4997 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4998 4999 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5000 len = ai[owners[rank + 1]] - ai[owners[rank]]; 5001 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5002 5003 current_space = free_space; 5004 5005 /* determine symbolic info for each local row */ 5006 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5007 5008 for (k = 0; k < merge->nrecv; k++) { 5009 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5010 nrows = *buf_ri_k[k]; 5011 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5012 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5013 } 5014 5015 MatPreallocateBegin(comm, m, n, dnz, onz); 5016 len = 0; 5017 for (i = 0; i < m; i++) { 5018 bnzi = 0; 5019 /* add local non-zero cols of this proc's seqmat into lnk */ 5020 arow = owners[rank] + i; 5021 anzi = ai[arow + 1] - ai[arow]; 5022 aj = a->j + ai[arow]; 5023 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5024 bnzi += nlnk; 5025 /* add received col data into lnk */ 5026 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5027 if (i == *nextrow[k]) { /* i-th row */ 5028 anzi = *(nextai[k] + 1) - *nextai[k]; 5029 aj = buf_rj[k] + *nextai[k]; 5030 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5031 bnzi += nlnk; 5032 nextrow[k]++; 5033 nextai[k]++; 5034 } 5035 } 5036 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5037 5038 /* if free space is not available, make more free space */ 5039 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5040 /* copy data into free space, then initialize lnk */ 5041 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5042 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5043 5044 current_space->array += bnzi; 5045 current_space->local_used += bnzi; 5046 current_space->local_remaining -= bnzi; 5047 5048 bi[i + 1] = bi[i] + bnzi; 5049 } 5050 5051 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5052 5053 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5054 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5055 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5056 5057 /* create symbolic parallel matrix B_mpi */ 5058 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5059 PetscCall(MatCreate(comm, &B_mpi)); 5060 if (n == PETSC_DECIDE) { 5061 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5062 } else { 5063 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5064 } 5065 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5066 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5067 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5068 MatPreallocateEnd(dnz, onz); 5069 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5070 5071 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5072 B_mpi->assembled = PETSC_FALSE; 5073 merge->bi = bi; 5074 merge->bj = bj; 5075 merge->buf_ri = buf_ri; 5076 merge->buf_rj = buf_rj; 5077 merge->coi = NULL; 5078 merge->coj = NULL; 5079 merge->owners_co = NULL; 5080 5081 PetscCall(PetscCommDestroy(&comm)); 5082 5083 /* attach the supporting struct to B_mpi for reuse */ 5084 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5085 PetscCall(PetscContainerSetPointer(container, merge)); 5086 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5087 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5088 PetscCall(PetscContainerDestroy(&container)); 5089 *mpimat = B_mpi; 5090 5091 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5092 PetscFunctionReturn(PETSC_SUCCESS); 5093 } 5094 5095 /*@C 5096 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5097 matrices from each processor 5098 5099 Collective 5100 5101 Input Parameters: 5102 + comm - the communicators the parallel matrix will live on 5103 . seqmat - the input sequential matrices 5104 . m - number of local rows (or `PETSC_DECIDE`) 5105 . n - number of local columns (or `PETSC_DECIDE`) 5106 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5107 5108 Output Parameter: 5109 . mpimat - the parallel matrix generated 5110 5111 Level: advanced 5112 5113 Note: 5114 The dimensions of the sequential matrix in each processor MUST be the same. 5115 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5116 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5117 5118 seealso: [](chapter_matrices), `Mat`, `MatCreateAIJ()` 5119 @*/ 5120 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5121 { 5122 PetscMPIInt size; 5123 5124 PetscFunctionBegin; 5125 PetscCallMPI(MPI_Comm_size(comm, &size)); 5126 if (size == 1) { 5127 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5128 if (scall == MAT_INITIAL_MATRIX) { 5129 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5130 } else { 5131 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5132 } 5133 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5134 PetscFunctionReturn(PETSC_SUCCESS); 5135 } 5136 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5137 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5138 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5139 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5140 PetscFunctionReturn(PETSC_SUCCESS); 5141 } 5142 5143 /*@ 5144 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking its local rows and putting them into a sequential matrix with 5145 mlocal rows and n columns. Where mlocal is obtained with `MatGetLocalSize()` and n is the global column count obtained 5146 with `MatGetSize()` 5147 5148 Not Collective 5149 5150 Input Parameter: 5151 . A - the matrix 5152 5153 Output Parameter: 5154 . A_loc - the local sequential matrix generated 5155 5156 Level: developer 5157 5158 Notes: 5159 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5160 5161 Destroy the matrix with `MatDestroy()` 5162 5163 .seealso: [](chapter_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5164 @*/ 5165 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5166 { 5167 PetscBool mpi; 5168 5169 PetscFunctionBegin; 5170 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5171 if (mpi) { 5172 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5173 } else { 5174 *A_loc = A; 5175 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5176 } 5177 PetscFunctionReturn(PETSC_SUCCESS); 5178 } 5179 5180 /*@ 5181 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5182 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5183 with `MatGetSize()` 5184 5185 Not Collective 5186 5187 Input Parameters: 5188 + A - the matrix 5189 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5190 5191 Output Parameter: 5192 . A_loc - the local sequential matrix generated 5193 5194 Level: developer 5195 5196 Notes: 5197 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5198 5199 When the communicator associated with `A` has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A`. 5200 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called. 5201 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5202 modify the values of the returned `A_loc`. 5203 5204 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5205 @*/ 5206 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5207 { 5208 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5209 Mat_SeqAIJ *mat, *a, *b; 5210 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5211 const PetscScalar *aa, *ba, *aav, *bav; 5212 PetscScalar *ca, *cam; 5213 PetscMPIInt size; 5214 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5215 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5216 PetscBool match; 5217 5218 PetscFunctionBegin; 5219 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5220 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5221 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5222 if (size == 1) { 5223 if (scall == MAT_INITIAL_MATRIX) { 5224 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5225 *A_loc = mpimat->A; 5226 } else if (scall == MAT_REUSE_MATRIX) { 5227 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5228 } 5229 PetscFunctionReturn(PETSC_SUCCESS); 5230 } 5231 5232 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5233 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5234 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5235 ai = a->i; 5236 aj = a->j; 5237 bi = b->i; 5238 bj = b->j; 5239 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5240 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5241 aa = aav; 5242 ba = bav; 5243 if (scall == MAT_INITIAL_MATRIX) { 5244 PetscCall(PetscMalloc1(1 + am, &ci)); 5245 ci[0] = 0; 5246 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5247 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5248 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5249 k = 0; 5250 for (i = 0; i < am; i++) { 5251 ncols_o = bi[i + 1] - bi[i]; 5252 ncols_d = ai[i + 1] - ai[i]; 5253 /* off-diagonal portion of A */ 5254 for (jo = 0; jo < ncols_o; jo++) { 5255 col = cmap[*bj]; 5256 if (col >= cstart) break; 5257 cj[k] = col; 5258 bj++; 5259 ca[k++] = *ba++; 5260 } 5261 /* diagonal portion of A */ 5262 for (j = 0; j < ncols_d; j++) { 5263 cj[k] = cstart + *aj++; 5264 ca[k++] = *aa++; 5265 } 5266 /* off-diagonal portion of A */ 5267 for (j = jo; j < ncols_o; j++) { 5268 cj[k] = cmap[*bj++]; 5269 ca[k++] = *ba++; 5270 } 5271 } 5272 /* put together the new matrix */ 5273 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5274 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5275 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5276 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5277 mat->free_a = PETSC_TRUE; 5278 mat->free_ij = PETSC_TRUE; 5279 mat->nonew = 0; 5280 } else if (scall == MAT_REUSE_MATRIX) { 5281 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5282 ci = mat->i; 5283 cj = mat->j; 5284 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5285 for (i = 0; i < am; i++) { 5286 /* off-diagonal portion of A */ 5287 ncols_o = bi[i + 1] - bi[i]; 5288 for (jo = 0; jo < ncols_o; jo++) { 5289 col = cmap[*bj]; 5290 if (col >= cstart) break; 5291 *cam++ = *ba++; 5292 bj++; 5293 } 5294 /* diagonal portion of A */ 5295 ncols_d = ai[i + 1] - ai[i]; 5296 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5297 /* off-diagonal portion of A */ 5298 for (j = jo; j < ncols_o; j++) { 5299 *cam++ = *ba++; 5300 bj++; 5301 } 5302 } 5303 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5304 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5305 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5306 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5307 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5308 PetscFunctionReturn(PETSC_SUCCESS); 5309 } 5310 5311 /*@ 5312 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5313 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5314 5315 Not Collective 5316 5317 Input Parameters: 5318 + A - the matrix 5319 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5320 5321 Output Parameters: 5322 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5323 - A_loc - the local sequential matrix generated 5324 5325 Level: developer 5326 5327 Note: 5328 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5329 part, then those associated with the off diagonal part (in its local ordering) 5330 5331 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5332 @*/ 5333 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5334 { 5335 Mat Ao, Ad; 5336 const PetscInt *cmap; 5337 PetscMPIInt size; 5338 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5339 5340 PetscFunctionBegin; 5341 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5342 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5343 if (size == 1) { 5344 if (scall == MAT_INITIAL_MATRIX) { 5345 PetscCall(PetscObjectReference((PetscObject)Ad)); 5346 *A_loc = Ad; 5347 } else if (scall == MAT_REUSE_MATRIX) { 5348 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5349 } 5350 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5351 PetscFunctionReturn(PETSC_SUCCESS); 5352 } 5353 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5354 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5355 if (f) { 5356 PetscCall((*f)(A, scall, glob, A_loc)); 5357 } else { 5358 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5359 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5360 Mat_SeqAIJ *c; 5361 PetscInt *ai = a->i, *aj = a->j; 5362 PetscInt *bi = b->i, *bj = b->j; 5363 PetscInt *ci, *cj; 5364 const PetscScalar *aa, *ba; 5365 PetscScalar *ca; 5366 PetscInt i, j, am, dn, on; 5367 5368 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5369 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5370 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5371 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5372 if (scall == MAT_INITIAL_MATRIX) { 5373 PetscInt k; 5374 PetscCall(PetscMalloc1(1 + am, &ci)); 5375 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5376 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5377 ci[0] = 0; 5378 for (i = 0, k = 0; i < am; i++) { 5379 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5380 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5381 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5382 /* diagonal portion of A */ 5383 for (j = 0; j < ncols_d; j++, k++) { 5384 cj[k] = *aj++; 5385 ca[k] = *aa++; 5386 } 5387 /* off-diagonal portion of A */ 5388 for (j = 0; j < ncols_o; j++, k++) { 5389 cj[k] = dn + *bj++; 5390 ca[k] = *ba++; 5391 } 5392 } 5393 /* put together the new matrix */ 5394 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5395 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5396 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5397 c = (Mat_SeqAIJ *)(*A_loc)->data; 5398 c->free_a = PETSC_TRUE; 5399 c->free_ij = PETSC_TRUE; 5400 c->nonew = 0; 5401 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5402 } else if (scall == MAT_REUSE_MATRIX) { 5403 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5404 for (i = 0; i < am; i++) { 5405 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5406 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5407 /* diagonal portion of A */ 5408 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5409 /* off-diagonal portion of A */ 5410 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5411 } 5412 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5413 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5414 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5415 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5416 if (glob) { 5417 PetscInt cst, *gidx; 5418 5419 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5420 PetscCall(PetscMalloc1(dn + on, &gidx)); 5421 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5422 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5423 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5424 } 5425 } 5426 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5427 PetscFunctionReturn(PETSC_SUCCESS); 5428 } 5429 5430 /*@C 5431 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5432 5433 Not Collective 5434 5435 Input Parameters: 5436 + A - the matrix 5437 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5438 . row - index set of rows to extract (or `NULL`) 5439 - col - index set of columns to extract (or `NULL`) 5440 5441 Output Parameter: 5442 . A_loc - the local sequential matrix generated 5443 5444 Level: developer 5445 5446 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5447 @*/ 5448 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5449 { 5450 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5451 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5452 IS isrowa, iscola; 5453 Mat *aloc; 5454 PetscBool match; 5455 5456 PetscFunctionBegin; 5457 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5458 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5459 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5460 if (!row) { 5461 start = A->rmap->rstart; 5462 end = A->rmap->rend; 5463 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5464 } else { 5465 isrowa = *row; 5466 } 5467 if (!col) { 5468 start = A->cmap->rstart; 5469 cmap = a->garray; 5470 nzA = a->A->cmap->n; 5471 nzB = a->B->cmap->n; 5472 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5473 ncols = 0; 5474 for (i = 0; i < nzB; i++) { 5475 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5476 else break; 5477 } 5478 imark = i; 5479 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5480 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5481 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5482 } else { 5483 iscola = *col; 5484 } 5485 if (scall != MAT_INITIAL_MATRIX) { 5486 PetscCall(PetscMalloc1(1, &aloc)); 5487 aloc[0] = *A_loc; 5488 } 5489 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5490 if (!col) { /* attach global id of condensed columns */ 5491 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5492 } 5493 *A_loc = aloc[0]; 5494 PetscCall(PetscFree(aloc)); 5495 if (!row) PetscCall(ISDestroy(&isrowa)); 5496 if (!col) PetscCall(ISDestroy(&iscola)); 5497 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5498 PetscFunctionReturn(PETSC_SUCCESS); 5499 } 5500 5501 /* 5502 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5503 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5504 * on a global size. 5505 * */ 5506 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5507 { 5508 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5509 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5510 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5511 PetscMPIInt owner; 5512 PetscSFNode *iremote, *oiremote; 5513 const PetscInt *lrowindices; 5514 PetscSF sf, osf; 5515 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5516 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5517 MPI_Comm comm; 5518 ISLocalToGlobalMapping mapping; 5519 const PetscScalar *pd_a, *po_a; 5520 5521 PetscFunctionBegin; 5522 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5523 /* plocalsize is the number of roots 5524 * nrows is the number of leaves 5525 * */ 5526 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5527 PetscCall(ISGetLocalSize(rows, &nrows)); 5528 PetscCall(PetscCalloc1(nrows, &iremote)); 5529 PetscCall(ISGetIndices(rows, &lrowindices)); 5530 for (i = 0; i < nrows; i++) { 5531 /* Find a remote index and an owner for a row 5532 * The row could be local or remote 5533 * */ 5534 owner = 0; 5535 lidx = 0; 5536 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5537 iremote[i].index = lidx; 5538 iremote[i].rank = owner; 5539 } 5540 /* Create SF to communicate how many nonzero columns for each row */ 5541 PetscCall(PetscSFCreate(comm, &sf)); 5542 /* SF will figure out the number of nonzero colunms for each row, and their 5543 * offsets 5544 * */ 5545 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5546 PetscCall(PetscSFSetFromOptions(sf)); 5547 PetscCall(PetscSFSetUp(sf)); 5548 5549 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5550 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5551 PetscCall(PetscCalloc1(nrows, &pnnz)); 5552 roffsets[0] = 0; 5553 roffsets[1] = 0; 5554 for (i = 0; i < plocalsize; i++) { 5555 /* diag */ 5556 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5557 /* off diag */ 5558 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5559 /* compute offsets so that we relative location for each row */ 5560 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5561 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5562 } 5563 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5564 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5565 /* 'r' means root, and 'l' means leaf */ 5566 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5567 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5568 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5569 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5570 PetscCall(PetscSFDestroy(&sf)); 5571 PetscCall(PetscFree(roffsets)); 5572 PetscCall(PetscFree(nrcols)); 5573 dntotalcols = 0; 5574 ontotalcols = 0; 5575 ncol = 0; 5576 for (i = 0; i < nrows; i++) { 5577 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5578 ncol = PetscMax(pnnz[i], ncol); 5579 /* diag */ 5580 dntotalcols += nlcols[i * 2 + 0]; 5581 /* off diag */ 5582 ontotalcols += nlcols[i * 2 + 1]; 5583 } 5584 /* We do not need to figure the right number of columns 5585 * since all the calculations will be done by going through the raw data 5586 * */ 5587 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5588 PetscCall(MatSetUp(*P_oth)); 5589 PetscCall(PetscFree(pnnz)); 5590 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5591 /* diag */ 5592 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5593 /* off diag */ 5594 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5595 /* diag */ 5596 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5597 /* off diag */ 5598 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5599 dntotalcols = 0; 5600 ontotalcols = 0; 5601 ntotalcols = 0; 5602 for (i = 0; i < nrows; i++) { 5603 owner = 0; 5604 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5605 /* Set iremote for diag matrix */ 5606 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5607 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5608 iremote[dntotalcols].rank = owner; 5609 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5610 ilocal[dntotalcols++] = ntotalcols++; 5611 } 5612 /* off diag */ 5613 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5614 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5615 oiremote[ontotalcols].rank = owner; 5616 oilocal[ontotalcols++] = ntotalcols++; 5617 } 5618 } 5619 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5620 PetscCall(PetscFree(loffsets)); 5621 PetscCall(PetscFree(nlcols)); 5622 PetscCall(PetscSFCreate(comm, &sf)); 5623 /* P serves as roots and P_oth is leaves 5624 * Diag matrix 5625 * */ 5626 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5627 PetscCall(PetscSFSetFromOptions(sf)); 5628 PetscCall(PetscSFSetUp(sf)); 5629 5630 PetscCall(PetscSFCreate(comm, &osf)); 5631 /* Off diag */ 5632 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5633 PetscCall(PetscSFSetFromOptions(osf)); 5634 PetscCall(PetscSFSetUp(osf)); 5635 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5636 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5637 /* We operate on the matrix internal data for saving memory */ 5638 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5639 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5640 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5641 /* Convert to global indices for diag matrix */ 5642 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5643 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5644 /* We want P_oth store global indices */ 5645 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5646 /* Use memory scalable approach */ 5647 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5648 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5649 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5650 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5651 /* Convert back to local indices */ 5652 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5653 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5654 nout = 0; 5655 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5656 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5657 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5658 /* Exchange values */ 5659 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5660 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5661 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5662 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5663 /* Stop PETSc from shrinking memory */ 5664 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5665 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5666 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5667 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5668 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5669 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5670 PetscCall(PetscSFDestroy(&sf)); 5671 PetscCall(PetscSFDestroy(&osf)); 5672 PetscFunctionReturn(PETSC_SUCCESS); 5673 } 5674 5675 /* 5676 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5677 * This supports MPIAIJ and MAIJ 5678 * */ 5679 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5680 { 5681 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5682 Mat_SeqAIJ *p_oth; 5683 IS rows, map; 5684 PetscHMapI hamp; 5685 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5686 MPI_Comm comm; 5687 PetscSF sf, osf; 5688 PetscBool has; 5689 5690 PetscFunctionBegin; 5691 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5692 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5693 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5694 * and then create a submatrix (that often is an overlapping matrix) 5695 * */ 5696 if (reuse == MAT_INITIAL_MATRIX) { 5697 /* Use a hash table to figure out unique keys */ 5698 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5699 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5700 count = 0; 5701 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5702 for (i = 0; i < a->B->cmap->n; i++) { 5703 key = a->garray[i] / dof; 5704 PetscCall(PetscHMapIHas(hamp, key, &has)); 5705 if (!has) { 5706 mapping[i] = count; 5707 PetscCall(PetscHMapISet(hamp, key, count++)); 5708 } else { 5709 /* Current 'i' has the same value the previous step */ 5710 mapping[i] = count - 1; 5711 } 5712 } 5713 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5714 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5715 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5716 PetscCall(PetscCalloc1(htsize, &rowindices)); 5717 off = 0; 5718 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5719 PetscCall(PetscHMapIDestroy(&hamp)); 5720 PetscCall(PetscSortInt(htsize, rowindices)); 5721 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5722 /* In case, the matrix was already created but users want to recreate the matrix */ 5723 PetscCall(MatDestroy(P_oth)); 5724 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5725 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5726 PetscCall(ISDestroy(&map)); 5727 PetscCall(ISDestroy(&rows)); 5728 } else if (reuse == MAT_REUSE_MATRIX) { 5729 /* If matrix was already created, we simply update values using SF objects 5730 * that as attached to the matrix earlier. 5731 */ 5732 const PetscScalar *pd_a, *po_a; 5733 5734 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5735 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5736 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5737 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5738 /* Update values in place */ 5739 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5740 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5741 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5742 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5743 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5744 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5745 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5746 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5747 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5748 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5749 PetscFunctionReturn(PETSC_SUCCESS); 5750 } 5751 5752 /*@C 5753 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5754 5755 Collective 5756 5757 Input Parameters: 5758 + A - the first matrix in `MATMPIAIJ` format 5759 . B - the second matrix in `MATMPIAIJ` format 5760 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5761 5762 Output Parameters: 5763 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5764 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5765 - B_seq - the sequential matrix generated 5766 5767 Level: developer 5768 5769 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5770 @*/ 5771 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5772 { 5773 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5774 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5775 IS isrowb, iscolb; 5776 Mat *bseq = NULL; 5777 5778 PetscFunctionBegin; 5779 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5780 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5781 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5782 5783 if (scall == MAT_INITIAL_MATRIX) { 5784 start = A->cmap->rstart; 5785 cmap = a->garray; 5786 nzA = a->A->cmap->n; 5787 nzB = a->B->cmap->n; 5788 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5789 ncols = 0; 5790 for (i = 0; i < nzB; i++) { /* row < local row index */ 5791 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5792 else break; 5793 } 5794 imark = i; 5795 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5796 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5797 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5798 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5799 } else { 5800 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5801 isrowb = *rowb; 5802 iscolb = *colb; 5803 PetscCall(PetscMalloc1(1, &bseq)); 5804 bseq[0] = *B_seq; 5805 } 5806 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5807 *B_seq = bseq[0]; 5808 PetscCall(PetscFree(bseq)); 5809 if (!rowb) { 5810 PetscCall(ISDestroy(&isrowb)); 5811 } else { 5812 *rowb = isrowb; 5813 } 5814 if (!colb) { 5815 PetscCall(ISDestroy(&iscolb)); 5816 } else { 5817 *colb = iscolb; 5818 } 5819 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5820 PetscFunctionReturn(PETSC_SUCCESS); 5821 } 5822 5823 /* 5824 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5825 of the OFF-DIAGONAL portion of local A 5826 5827 Collective 5828 5829 Input Parameters: 5830 + A,B - the matrices in `MATMPIAIJ` format 5831 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5832 5833 Output Parameter: 5834 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5835 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5836 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5837 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5838 5839 Developer Note: 5840 This directly accesses information inside the VecScatter associated with the matrix-vector product 5841 for this matrix. This is not desirable.. 5842 5843 Level: developer 5844 5845 */ 5846 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5847 { 5848 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5849 Mat_SeqAIJ *b_oth; 5850 VecScatter ctx; 5851 MPI_Comm comm; 5852 const PetscMPIInt *rprocs, *sprocs; 5853 const PetscInt *srow, *rstarts, *sstarts; 5854 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5855 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5856 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5857 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5858 PetscMPIInt size, tag, rank, nreqs; 5859 5860 PetscFunctionBegin; 5861 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5862 PetscCallMPI(MPI_Comm_size(comm, &size)); 5863 5864 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5865 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5866 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5867 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5868 5869 if (size == 1) { 5870 startsj_s = NULL; 5871 bufa_ptr = NULL; 5872 *B_oth = NULL; 5873 PetscFunctionReturn(PETSC_SUCCESS); 5874 } 5875 5876 ctx = a->Mvctx; 5877 tag = ((PetscObject)ctx)->tag; 5878 5879 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5880 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5881 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5882 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5883 PetscCall(PetscMalloc1(nreqs, &reqs)); 5884 rwaits = reqs; 5885 swaits = reqs + nrecvs; 5886 5887 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5888 if (scall == MAT_INITIAL_MATRIX) { 5889 /* i-array */ 5890 /* post receives */ 5891 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5892 for (i = 0; i < nrecvs; i++) { 5893 rowlen = rvalues + rstarts[i] * rbs; 5894 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5895 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5896 } 5897 5898 /* pack the outgoing message */ 5899 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5900 5901 sstartsj[0] = 0; 5902 rstartsj[0] = 0; 5903 len = 0; /* total length of j or a array to be sent */ 5904 if (nsends) { 5905 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5906 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5907 } 5908 for (i = 0; i < nsends; i++) { 5909 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5910 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5911 for (j = 0; j < nrows; j++) { 5912 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5913 for (l = 0; l < sbs; l++) { 5914 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5915 5916 rowlen[j * sbs + l] = ncols; 5917 5918 len += ncols; 5919 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5920 } 5921 k++; 5922 } 5923 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5924 5925 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5926 } 5927 /* recvs and sends of i-array are completed */ 5928 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5929 PetscCall(PetscFree(svalues)); 5930 5931 /* allocate buffers for sending j and a arrays */ 5932 PetscCall(PetscMalloc1(len + 1, &bufj)); 5933 PetscCall(PetscMalloc1(len + 1, &bufa)); 5934 5935 /* create i-array of B_oth */ 5936 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5937 5938 b_othi[0] = 0; 5939 len = 0; /* total length of j or a array to be received */ 5940 k = 0; 5941 for (i = 0; i < nrecvs; i++) { 5942 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5943 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5944 for (j = 0; j < nrows; j++) { 5945 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5946 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5947 k++; 5948 } 5949 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5950 } 5951 PetscCall(PetscFree(rvalues)); 5952 5953 /* allocate space for j and a arrays of B_oth */ 5954 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5955 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5956 5957 /* j-array */ 5958 /* post receives of j-array */ 5959 for (i = 0; i < nrecvs; i++) { 5960 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5961 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5962 } 5963 5964 /* pack the outgoing message j-array */ 5965 if (nsends) k = sstarts[0]; 5966 for (i = 0; i < nsends; i++) { 5967 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5968 bufJ = bufj + sstartsj[i]; 5969 for (j = 0; j < nrows; j++) { 5970 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5971 for (ll = 0; ll < sbs; ll++) { 5972 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5973 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5974 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5975 } 5976 } 5977 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5978 } 5979 5980 /* recvs and sends of j-array are completed */ 5981 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5982 } else if (scall == MAT_REUSE_MATRIX) { 5983 sstartsj = *startsj_s; 5984 rstartsj = *startsj_r; 5985 bufa = *bufa_ptr; 5986 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5987 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5988 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5989 5990 /* a-array */ 5991 /* post receives of a-array */ 5992 for (i = 0; i < nrecvs; i++) { 5993 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5994 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5995 } 5996 5997 /* pack the outgoing message a-array */ 5998 if (nsends) k = sstarts[0]; 5999 for (i = 0; i < nsends; i++) { 6000 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6001 bufA = bufa + sstartsj[i]; 6002 for (j = 0; j < nrows; j++) { 6003 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6004 for (ll = 0; ll < sbs; ll++) { 6005 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6006 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6007 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6008 } 6009 } 6010 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6011 } 6012 /* recvs and sends of a-array are completed */ 6013 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6014 PetscCall(PetscFree(reqs)); 6015 6016 if (scall == MAT_INITIAL_MATRIX) { 6017 /* put together the new matrix */ 6018 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6019 6020 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6021 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6022 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6023 b_oth->free_a = PETSC_TRUE; 6024 b_oth->free_ij = PETSC_TRUE; 6025 b_oth->nonew = 0; 6026 6027 PetscCall(PetscFree(bufj)); 6028 if (!startsj_s || !bufa_ptr) { 6029 PetscCall(PetscFree2(sstartsj, rstartsj)); 6030 PetscCall(PetscFree(bufa_ptr)); 6031 } else { 6032 *startsj_s = sstartsj; 6033 *startsj_r = rstartsj; 6034 *bufa_ptr = bufa; 6035 } 6036 } else if (scall == MAT_REUSE_MATRIX) { 6037 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6038 } 6039 6040 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6041 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6042 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6043 PetscFunctionReturn(PETSC_SUCCESS); 6044 } 6045 6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6049 #if defined(PETSC_HAVE_MKL_SPARSE) 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6051 #endif 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6054 #if defined(PETSC_HAVE_ELEMENTAL) 6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6056 #endif 6057 #if defined(PETSC_HAVE_SCALAPACK) 6058 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6059 #endif 6060 #if defined(PETSC_HAVE_HYPRE) 6061 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6062 #endif 6063 #if defined(PETSC_HAVE_CUDA) 6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6065 #endif 6066 #if defined(PETSC_HAVE_HIP) 6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6068 #endif 6069 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6070 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6071 #endif 6072 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6073 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6074 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6075 6076 /* 6077 Computes (B'*A')' since computing B*A directly is untenable 6078 6079 n p p 6080 [ ] [ ] [ ] 6081 m [ A ] * n [ B ] = m [ C ] 6082 [ ] [ ] [ ] 6083 6084 */ 6085 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6086 { 6087 Mat At, Bt, Ct; 6088 6089 PetscFunctionBegin; 6090 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6091 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6092 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6093 PetscCall(MatDestroy(&At)); 6094 PetscCall(MatDestroy(&Bt)); 6095 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6096 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6097 PetscCall(MatDestroy(&Ct)); 6098 PetscFunctionReturn(PETSC_SUCCESS); 6099 } 6100 6101 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6102 { 6103 PetscBool cisdense; 6104 6105 PetscFunctionBegin; 6106 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6107 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6108 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6109 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6110 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6111 PetscCall(MatSetUp(C)); 6112 6113 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6114 PetscFunctionReturn(PETSC_SUCCESS); 6115 } 6116 6117 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6118 { 6119 Mat_Product *product = C->product; 6120 Mat A = product->A, B = product->B; 6121 6122 PetscFunctionBegin; 6123 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6124 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6125 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6126 C->ops->productsymbolic = MatProductSymbolic_AB; 6127 PetscFunctionReturn(PETSC_SUCCESS); 6128 } 6129 6130 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6131 { 6132 Mat_Product *product = C->product; 6133 6134 PetscFunctionBegin; 6135 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6136 PetscFunctionReturn(PETSC_SUCCESS); 6137 } 6138 6139 /* 6140 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6141 6142 Input Parameters: 6143 6144 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6145 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6146 6147 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6148 6149 For Set1, j1[] contains column indices of the nonzeros. 6150 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6151 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6152 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6153 6154 Similar for Set2. 6155 6156 This routine merges the two sets of nonzeros row by row and removes repeats. 6157 6158 Output Parameters: (memory is allocated by the caller) 6159 6160 i[],j[]: the CSR of the merged matrix, which has m rows. 6161 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6162 imap2[]: similar to imap1[], but for Set2. 6163 Note we order nonzeros row-by-row and from left to right. 6164 */ 6165 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6166 { 6167 PetscInt r, m; /* Row index of mat */ 6168 PetscCount t, t1, t2, b1, e1, b2, e2; 6169 6170 PetscFunctionBegin; 6171 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6172 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6173 i[0] = 0; 6174 for (r = 0; r < m; r++) { /* Do row by row merging */ 6175 b1 = rowBegin1[r]; 6176 e1 = rowEnd1[r]; 6177 b2 = rowBegin2[r]; 6178 e2 = rowEnd2[r]; 6179 while (b1 < e1 && b2 < e2) { 6180 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6181 j[t] = j1[b1]; 6182 imap1[t1] = t; 6183 imap2[t2] = t; 6184 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6185 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6186 t1++; 6187 t2++; 6188 t++; 6189 } else if (j1[b1] < j2[b2]) { 6190 j[t] = j1[b1]; 6191 imap1[t1] = t; 6192 b1 += jmap1[t1 + 1] - jmap1[t1]; 6193 t1++; 6194 t++; 6195 } else { 6196 j[t] = j2[b2]; 6197 imap2[t2] = t; 6198 b2 += jmap2[t2 + 1] - jmap2[t2]; 6199 t2++; 6200 t++; 6201 } 6202 } 6203 /* Merge the remaining in either j1[] or j2[] */ 6204 while (b1 < e1) { 6205 j[t] = j1[b1]; 6206 imap1[t1] = t; 6207 b1 += jmap1[t1 + 1] - jmap1[t1]; 6208 t1++; 6209 t++; 6210 } 6211 while (b2 < e2) { 6212 j[t] = j2[b2]; 6213 imap2[t2] = t; 6214 b2 += jmap2[t2 + 1] - jmap2[t2]; 6215 t2++; 6216 t++; 6217 } 6218 i[r + 1] = t; 6219 } 6220 PetscFunctionReturn(PETSC_SUCCESS); 6221 } 6222 6223 /* 6224 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6225 6226 Input Parameters: 6227 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6228 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6229 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6230 6231 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6232 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6233 6234 Output Parameters: 6235 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6236 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6237 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6238 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6239 6240 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6241 Atot: number of entries belonging to the diagonal block. 6242 Annz: number of unique nonzeros belonging to the diagonal block. 6243 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6244 repeats (i.e., same 'i,j' pair). 6245 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6246 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6247 6248 Atot: number of entries belonging to the diagonal block 6249 Annz: number of unique nonzeros belonging to the diagonal block. 6250 6251 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6252 6253 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6254 */ 6255 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6256 { 6257 PetscInt cstart, cend, rstart, rend, row, col; 6258 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6259 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6260 PetscCount k, m, p, q, r, s, mid; 6261 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6262 6263 PetscFunctionBegin; 6264 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6265 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6266 m = rend - rstart; 6267 6268 for (k = 0; k < n; k++) { 6269 if (i[k] >= 0) break; 6270 } /* Skip negative rows */ 6271 6272 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6273 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6274 */ 6275 while (k < n) { 6276 row = i[k]; 6277 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6278 for (s = k; s < n; s++) 6279 if (i[s] != row) break; 6280 for (p = k; p < s; p++) { 6281 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6282 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6283 } 6284 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6285 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6286 rowBegin[row - rstart] = k; 6287 rowMid[row - rstart] = mid; 6288 rowEnd[row - rstart] = s; 6289 6290 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6291 Atot += mid - k; 6292 Btot += s - mid; 6293 6294 /* Count unique nonzeros of this diag/offdiag row */ 6295 for (p = k; p < mid;) { 6296 col = j[p]; 6297 do { 6298 j[p] += PETSC_MAX_INT; 6299 p++; 6300 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6301 Annz++; 6302 } 6303 6304 for (p = mid; p < s;) { 6305 col = j[p]; 6306 do { 6307 p++; 6308 } while (p < s && j[p] == col); 6309 Bnnz++; 6310 } 6311 k = s; 6312 } 6313 6314 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6315 PetscCall(PetscMalloc1(Atot, &Aperm)); 6316 PetscCall(PetscMalloc1(Btot, &Bperm)); 6317 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6318 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6319 6320 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6321 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6322 for (r = 0; r < m; r++) { 6323 k = rowBegin[r]; 6324 mid = rowMid[r]; 6325 s = rowEnd[r]; 6326 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6327 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6328 Atot += mid - k; 6329 Btot += s - mid; 6330 6331 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6332 for (p = k; p < mid;) { 6333 col = j[p]; 6334 q = p; 6335 do { 6336 p++; 6337 } while (p < mid && j[p] == col); 6338 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6339 Annz++; 6340 } 6341 6342 for (p = mid; p < s;) { 6343 col = j[p]; 6344 q = p; 6345 do { 6346 p++; 6347 } while (p < s && j[p] == col); 6348 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6349 Bnnz++; 6350 } 6351 } 6352 /* Output */ 6353 *Aperm_ = Aperm; 6354 *Annz_ = Annz; 6355 *Atot_ = Atot; 6356 *Ajmap_ = Ajmap; 6357 *Bperm_ = Bperm; 6358 *Bnnz_ = Bnnz; 6359 *Btot_ = Btot; 6360 *Bjmap_ = Bjmap; 6361 PetscFunctionReturn(PETSC_SUCCESS); 6362 } 6363 6364 /* 6365 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6366 6367 Input Parameters: 6368 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6369 nnz: number of unique nonzeros in the merged matrix 6370 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6371 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6372 6373 Output Parameter: (memory is allocated by the caller) 6374 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6375 6376 Example: 6377 nnz1 = 4 6378 nnz = 6 6379 imap = [1,3,4,5] 6380 jmap = [0,3,5,6,7] 6381 then, 6382 jmap_new = [0,0,3,3,5,6,7] 6383 */ 6384 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6385 { 6386 PetscCount k, p; 6387 6388 PetscFunctionBegin; 6389 jmap_new[0] = 0; 6390 p = nnz; /* p loops over jmap_new[] backwards */ 6391 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6392 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6393 } 6394 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6395 PetscFunctionReturn(PETSC_SUCCESS); 6396 } 6397 6398 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6399 { 6400 MPI_Comm comm; 6401 PetscMPIInt rank, size; 6402 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6403 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6404 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6405 6406 PetscFunctionBegin; 6407 PetscCall(PetscFree(mpiaij->garray)); 6408 PetscCall(VecDestroy(&mpiaij->lvec)); 6409 #if defined(PETSC_USE_CTABLE) 6410 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6411 #else 6412 PetscCall(PetscFree(mpiaij->colmap)); 6413 #endif 6414 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6415 mat->assembled = PETSC_FALSE; 6416 mat->was_assembled = PETSC_FALSE; 6417 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6418 6419 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6420 PetscCallMPI(MPI_Comm_size(comm, &size)); 6421 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6422 PetscCall(PetscLayoutSetUp(mat->rmap)); 6423 PetscCall(PetscLayoutSetUp(mat->cmap)); 6424 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6425 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6426 PetscCall(MatGetLocalSize(mat, &m, &n)); 6427 PetscCall(MatGetSize(mat, &M, &N)); 6428 6429 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6430 /* entries come first, then local rows, then remote rows. */ 6431 PetscCount n1 = coo_n, *perm1; 6432 PetscInt *i1 = coo_i, *j1 = coo_j; 6433 6434 PetscCall(PetscMalloc1(n1, &perm1)); 6435 for (k = 0; k < n1; k++) perm1[k] = k; 6436 6437 /* Manipulate indices so that entries with negative row or col indices will have smallest 6438 row indices, local entries will have greater but negative row indices, and remote entries 6439 will have positive row indices. 6440 */ 6441 for (k = 0; k < n1; k++) { 6442 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6443 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6444 else { 6445 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6446 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6447 } 6448 } 6449 6450 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6451 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6452 for (k = 0; k < n1; k++) { 6453 if (i1[k] > PETSC_MIN_INT) break; 6454 } /* Advance k to the first entry we need to take care of */ 6455 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6456 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6457 6458 /* Split local rows into diag/offdiag portions */ 6459 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6460 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6461 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6462 6463 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6464 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6465 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6466 6467 /* Send remote rows to their owner */ 6468 /* Find which rows should be sent to which remote ranks*/ 6469 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6470 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6471 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6472 const PetscInt *ranges; 6473 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6474 6475 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6476 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6477 for (k = rem; k < n1;) { 6478 PetscMPIInt owner; 6479 PetscInt firstRow, lastRow; 6480 6481 /* Locate a row range */ 6482 firstRow = i1[k]; /* first row of this owner */ 6483 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6484 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6485 6486 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6487 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6488 6489 /* All entries in [k,p) belong to this remote owner */ 6490 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6491 PetscMPIInt *sendto2; 6492 PetscInt *nentries2; 6493 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6494 6495 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6496 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6497 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6498 PetscCall(PetscFree2(sendto, nentries2)); 6499 sendto = sendto2; 6500 nentries = nentries2; 6501 maxNsend = maxNsend2; 6502 } 6503 sendto[nsend] = owner; 6504 nentries[nsend] = p - k; 6505 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6506 nsend++; 6507 k = p; 6508 } 6509 6510 /* Build 1st SF to know offsets on remote to send data */ 6511 PetscSF sf1; 6512 PetscInt nroots = 1, nroots2 = 0; 6513 PetscInt nleaves = nsend, nleaves2 = 0; 6514 PetscInt *offsets; 6515 PetscSFNode *iremote; 6516 6517 PetscCall(PetscSFCreate(comm, &sf1)); 6518 PetscCall(PetscMalloc1(nsend, &iremote)); 6519 PetscCall(PetscMalloc1(nsend, &offsets)); 6520 for (k = 0; k < nsend; k++) { 6521 iremote[k].rank = sendto[k]; 6522 iremote[k].index = 0; 6523 nleaves2 += nentries[k]; 6524 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6525 } 6526 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6527 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6528 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6529 PetscCall(PetscSFDestroy(&sf1)); 6530 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6531 6532 /* Build 2nd SF to send remote COOs to their owner */ 6533 PetscSF sf2; 6534 nroots = nroots2; 6535 nleaves = nleaves2; 6536 PetscCall(PetscSFCreate(comm, &sf2)); 6537 PetscCall(PetscSFSetFromOptions(sf2)); 6538 PetscCall(PetscMalloc1(nleaves, &iremote)); 6539 p = 0; 6540 for (k = 0; k < nsend; k++) { 6541 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6542 for (q = 0; q < nentries[k]; q++, p++) { 6543 iremote[p].rank = sendto[k]; 6544 iremote[p].index = offsets[k] + q; 6545 } 6546 } 6547 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6548 6549 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6550 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6551 6552 /* Send the remote COOs to their owner */ 6553 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6554 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6555 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6556 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6557 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6558 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6559 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6560 6561 PetscCall(PetscFree(offsets)); 6562 PetscCall(PetscFree2(sendto, nentries)); 6563 6564 /* Sort received COOs by row along with the permutation array */ 6565 for (k = 0; k < n2; k++) perm2[k] = k; 6566 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6567 6568 /* Split received COOs into diag/offdiag portions */ 6569 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6570 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6571 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6572 6573 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6574 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6575 6576 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6577 PetscInt *Ai, *Bi; 6578 PetscInt *Aj, *Bj; 6579 6580 PetscCall(PetscMalloc1(m + 1, &Ai)); 6581 PetscCall(PetscMalloc1(m + 1, &Bi)); 6582 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6583 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6584 6585 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6586 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6587 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6588 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6589 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6590 6591 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6592 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6593 6594 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6595 /* expect nonzeros in A/B most likely have local contributing entries */ 6596 PetscInt Annz = Ai[m]; 6597 PetscInt Bnnz = Bi[m]; 6598 PetscCount *Ajmap1_new, *Bjmap1_new; 6599 6600 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6601 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6602 6603 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6604 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6605 6606 PetscCall(PetscFree(Aimap1)); 6607 PetscCall(PetscFree(Ajmap1)); 6608 PetscCall(PetscFree(Bimap1)); 6609 PetscCall(PetscFree(Bjmap1)); 6610 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6611 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6612 PetscCall(PetscFree(perm1)); 6613 PetscCall(PetscFree3(i2, j2, perm2)); 6614 6615 Ajmap1 = Ajmap1_new; 6616 Bjmap1 = Bjmap1_new; 6617 6618 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6619 if (Annz < Annz1 + Annz2) { 6620 PetscInt *Aj_new; 6621 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6622 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6623 PetscCall(PetscFree(Aj)); 6624 Aj = Aj_new; 6625 } 6626 6627 if (Bnnz < Bnnz1 + Bnnz2) { 6628 PetscInt *Bj_new; 6629 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6630 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6631 PetscCall(PetscFree(Bj)); 6632 Bj = Bj_new; 6633 } 6634 6635 /* Create new submatrices for on-process and off-process coupling */ 6636 PetscScalar *Aa, *Ba; 6637 MatType rtype; 6638 Mat_SeqAIJ *a, *b; 6639 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6640 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6641 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6642 if (cstart) { 6643 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6644 } 6645 PetscCall(MatDestroy(&mpiaij->A)); 6646 PetscCall(MatDestroy(&mpiaij->B)); 6647 PetscCall(MatGetRootType_Private(mat, &rtype)); 6648 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6649 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6650 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6651 6652 a = (Mat_SeqAIJ *)mpiaij->A->data; 6653 b = (Mat_SeqAIJ *)mpiaij->B->data; 6654 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6655 a->free_a = b->free_a = PETSC_TRUE; 6656 a->free_ij = b->free_ij = PETSC_TRUE; 6657 6658 /* conversion must happen AFTER multiply setup */ 6659 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6660 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6661 PetscCall(VecDestroy(&mpiaij->lvec)); 6662 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6663 6664 mpiaij->coo_n = coo_n; 6665 mpiaij->coo_sf = sf2; 6666 mpiaij->sendlen = nleaves; 6667 mpiaij->recvlen = nroots; 6668 6669 mpiaij->Annz = Annz; 6670 mpiaij->Bnnz = Bnnz; 6671 6672 mpiaij->Annz2 = Annz2; 6673 mpiaij->Bnnz2 = Bnnz2; 6674 6675 mpiaij->Atot1 = Atot1; 6676 mpiaij->Atot2 = Atot2; 6677 mpiaij->Btot1 = Btot1; 6678 mpiaij->Btot2 = Btot2; 6679 6680 mpiaij->Ajmap1 = Ajmap1; 6681 mpiaij->Aperm1 = Aperm1; 6682 6683 mpiaij->Bjmap1 = Bjmap1; 6684 mpiaij->Bperm1 = Bperm1; 6685 6686 mpiaij->Aimap2 = Aimap2; 6687 mpiaij->Ajmap2 = Ajmap2; 6688 mpiaij->Aperm2 = Aperm2; 6689 6690 mpiaij->Bimap2 = Bimap2; 6691 mpiaij->Bjmap2 = Bjmap2; 6692 mpiaij->Bperm2 = Bperm2; 6693 6694 mpiaij->Cperm1 = Cperm1; 6695 6696 /* Allocate in preallocation. If not used, it has zero cost on host */ 6697 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6698 PetscFunctionReturn(PETSC_SUCCESS); 6699 } 6700 6701 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6702 { 6703 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6704 Mat A = mpiaij->A, B = mpiaij->B; 6705 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6706 PetscScalar *Aa, *Ba; 6707 PetscScalar *sendbuf = mpiaij->sendbuf; 6708 PetscScalar *recvbuf = mpiaij->recvbuf; 6709 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6710 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6711 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6712 const PetscCount *Cperm1 = mpiaij->Cperm1; 6713 6714 PetscFunctionBegin; 6715 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6716 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6717 6718 /* Pack entries to be sent to remote */ 6719 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6720 6721 /* Send remote entries to their owner and overlap the communication with local computation */ 6722 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6723 /* Add local entries to A and B */ 6724 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6725 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6726 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6727 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6728 } 6729 for (PetscCount i = 0; i < Bnnz; i++) { 6730 PetscScalar sum = 0.0; 6731 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6732 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6733 } 6734 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6735 6736 /* Add received remote entries to A and B */ 6737 for (PetscCount i = 0; i < Annz2; i++) { 6738 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6739 } 6740 for (PetscCount i = 0; i < Bnnz2; i++) { 6741 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6742 } 6743 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6744 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6745 PetscFunctionReturn(PETSC_SUCCESS); 6746 } 6747 6748 /*MC 6749 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6750 6751 Options Database Keys: 6752 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6753 6754 Level: beginner 6755 6756 Notes: 6757 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6758 in this case the values associated with the rows and columns one passes in are set to zero 6759 in the matrix 6760 6761 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6762 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6763 6764 .seealso: [](chapter_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6765 M*/ 6766 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6767 { 6768 Mat_MPIAIJ *b; 6769 PetscMPIInt size; 6770 6771 PetscFunctionBegin; 6772 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6773 6774 PetscCall(PetscNew(&b)); 6775 B->data = (void *)b; 6776 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6777 B->assembled = PETSC_FALSE; 6778 B->insertmode = NOT_SET_VALUES; 6779 b->size = size; 6780 6781 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6782 6783 /* build cache for off array entries formed */ 6784 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6785 6786 b->donotstash = PETSC_FALSE; 6787 b->colmap = NULL; 6788 b->garray = NULL; 6789 b->roworiented = PETSC_TRUE; 6790 6791 /* stuff used for matrix vector multiply */ 6792 b->lvec = NULL; 6793 b->Mvctx = NULL; 6794 6795 /* stuff for MatGetRow() */ 6796 b->rowindices = NULL; 6797 b->rowvalues = NULL; 6798 b->getrowactive = PETSC_FALSE; 6799 6800 /* flexible pointer used in CUSPARSE classes */ 6801 b->spptr = NULL; 6802 6803 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6804 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6805 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6806 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6807 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6808 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6809 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6810 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6811 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6812 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6813 #if defined(PETSC_HAVE_CUDA) 6814 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6815 #endif 6816 #if defined(PETSC_HAVE_HIP) 6817 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6818 #endif 6819 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6820 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6821 #endif 6822 #if defined(PETSC_HAVE_MKL_SPARSE) 6823 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6824 #endif 6825 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6826 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6827 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6828 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6829 #if defined(PETSC_HAVE_ELEMENTAL) 6830 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6831 #endif 6832 #if defined(PETSC_HAVE_SCALAPACK) 6833 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6834 #endif 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6837 #if defined(PETSC_HAVE_HYPRE) 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6840 #endif 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6845 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6846 PetscFunctionReturn(PETSC_SUCCESS); 6847 } 6848 6849 /*@C 6850 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6851 and "off-diagonal" part of the matrix in CSR format. 6852 6853 Collective 6854 6855 Input Parameters: 6856 + comm - MPI communicator 6857 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6858 . n - This value should be the same as the local size used in creating the 6859 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6860 calculated if `N` is given) For square matrices `n` is almost always `m`. 6861 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6862 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6863 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6864 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6865 . a - matrix values 6866 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6867 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6868 - oa - matrix values 6869 6870 Output Parameter: 6871 . mat - the matrix 6872 6873 Level: advanced 6874 6875 Notes: 6876 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6877 must free the arrays once the matrix has been destroyed and not before. 6878 6879 The `i` and `j` indices are 0 based 6880 6881 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6882 6883 This sets local rows and cannot be used to set off-processor values. 6884 6885 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6886 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6887 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6888 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6889 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6890 communication if it is known that only local entries will be set. 6891 6892 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6893 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6894 @*/ 6895 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6896 { 6897 Mat_MPIAIJ *maij; 6898 6899 PetscFunctionBegin; 6900 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6901 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6902 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6903 PetscCall(MatCreate(comm, mat)); 6904 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6905 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6906 maij = (Mat_MPIAIJ *)(*mat)->data; 6907 6908 (*mat)->preallocated = PETSC_TRUE; 6909 6910 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6911 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6912 6913 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6914 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6915 6916 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6917 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6918 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6919 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6920 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6921 PetscFunctionReturn(PETSC_SUCCESS); 6922 } 6923 6924 typedef struct { 6925 Mat *mp; /* intermediate products */ 6926 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6927 PetscInt cp; /* number of intermediate products */ 6928 6929 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6930 PetscInt *startsj_s, *startsj_r; 6931 PetscScalar *bufa; 6932 Mat P_oth; 6933 6934 /* may take advantage of merging product->B */ 6935 Mat Bloc; /* B-local by merging diag and off-diag */ 6936 6937 /* cusparse does not have support to split between symbolic and numeric phases. 6938 When api_user is true, we don't need to update the numerical values 6939 of the temporary storage */ 6940 PetscBool reusesym; 6941 6942 /* support for COO values insertion */ 6943 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6944 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6945 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6946 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6947 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6948 PetscMemType mtype; 6949 6950 /* customization */ 6951 PetscBool abmerge; 6952 PetscBool P_oth_bind; 6953 } MatMatMPIAIJBACKEND; 6954 6955 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6956 { 6957 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6958 PetscInt i; 6959 6960 PetscFunctionBegin; 6961 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6962 PetscCall(PetscFree(mmdata->bufa)); 6963 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6964 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6965 PetscCall(MatDestroy(&mmdata->P_oth)); 6966 PetscCall(MatDestroy(&mmdata->Bloc)); 6967 PetscCall(PetscSFDestroy(&mmdata->sf)); 6968 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6969 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6970 PetscCall(PetscFree(mmdata->own[0])); 6971 PetscCall(PetscFree(mmdata->own)); 6972 PetscCall(PetscFree(mmdata->off[0])); 6973 PetscCall(PetscFree(mmdata->off)); 6974 PetscCall(PetscFree(mmdata)); 6975 PetscFunctionReturn(PETSC_SUCCESS); 6976 } 6977 6978 /* Copy selected n entries with indices in idx[] of A to v[]. 6979 If idx is NULL, copy the whole data array of A to v[] 6980 */ 6981 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6982 { 6983 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6984 6985 PetscFunctionBegin; 6986 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 6987 if (f) { 6988 PetscCall((*f)(A, n, idx, v)); 6989 } else { 6990 const PetscScalar *vv; 6991 6992 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 6993 if (n && idx) { 6994 PetscScalar *w = v; 6995 const PetscInt *oi = idx; 6996 PetscInt j; 6997 6998 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6999 } else { 7000 PetscCall(PetscArraycpy(v, vv, n)); 7001 } 7002 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7003 } 7004 PetscFunctionReturn(PETSC_SUCCESS); 7005 } 7006 7007 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7008 { 7009 MatMatMPIAIJBACKEND *mmdata; 7010 PetscInt i, n_d, n_o; 7011 7012 PetscFunctionBegin; 7013 MatCheckProduct(C, 1); 7014 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7015 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7016 if (!mmdata->reusesym) { /* update temporary matrices */ 7017 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7018 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7019 } 7020 mmdata->reusesym = PETSC_FALSE; 7021 7022 for (i = 0; i < mmdata->cp; i++) { 7023 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7024 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7025 } 7026 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7027 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7028 7029 if (mmdata->mptmp[i]) continue; 7030 if (noff) { 7031 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7032 7033 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7034 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7035 n_o += noff; 7036 n_d += nown; 7037 } else { 7038 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7039 7040 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7041 n_d += mm->nz; 7042 } 7043 } 7044 if (mmdata->hasoffproc) { /* offprocess insertion */ 7045 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7046 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7047 } 7048 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7049 PetscFunctionReturn(PETSC_SUCCESS); 7050 } 7051 7052 /* Support for Pt * A, A * P, or Pt * A * P */ 7053 #define MAX_NUMBER_INTERMEDIATE 4 7054 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7055 { 7056 Mat_Product *product = C->product; 7057 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7058 Mat_MPIAIJ *a, *p; 7059 MatMatMPIAIJBACKEND *mmdata; 7060 ISLocalToGlobalMapping P_oth_l2g = NULL; 7061 IS glob = NULL; 7062 const char *prefix; 7063 char pprefix[256]; 7064 const PetscInt *globidx, *P_oth_idx; 7065 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7066 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7067 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7068 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7069 /* a base offset; type-2: sparse with a local to global map table */ 7070 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7071 7072 MatProductType ptype; 7073 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7074 PetscMPIInt size; 7075 7076 PetscFunctionBegin; 7077 MatCheckProduct(C, 1); 7078 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7079 ptype = product->type; 7080 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7081 ptype = MATPRODUCT_AB; 7082 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7083 } 7084 switch (ptype) { 7085 case MATPRODUCT_AB: 7086 A = product->A; 7087 P = product->B; 7088 m = A->rmap->n; 7089 n = P->cmap->n; 7090 M = A->rmap->N; 7091 N = P->cmap->N; 7092 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7093 break; 7094 case MATPRODUCT_AtB: 7095 P = product->A; 7096 A = product->B; 7097 m = P->cmap->n; 7098 n = A->cmap->n; 7099 M = P->cmap->N; 7100 N = A->cmap->N; 7101 hasoffproc = PETSC_TRUE; 7102 break; 7103 case MATPRODUCT_PtAP: 7104 A = product->A; 7105 P = product->B; 7106 m = P->cmap->n; 7107 n = P->cmap->n; 7108 M = P->cmap->N; 7109 N = P->cmap->N; 7110 hasoffproc = PETSC_TRUE; 7111 break; 7112 default: 7113 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7114 } 7115 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7116 if (size == 1) hasoffproc = PETSC_FALSE; 7117 7118 /* defaults */ 7119 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7120 mp[i] = NULL; 7121 mptmp[i] = PETSC_FALSE; 7122 rmapt[i] = -1; 7123 cmapt[i] = -1; 7124 rmapa[i] = NULL; 7125 cmapa[i] = NULL; 7126 } 7127 7128 /* customization */ 7129 PetscCall(PetscNew(&mmdata)); 7130 mmdata->reusesym = product->api_user; 7131 if (ptype == MATPRODUCT_AB) { 7132 if (product->api_user) { 7133 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7134 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7135 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7136 PetscOptionsEnd(); 7137 } else { 7138 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7139 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7140 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7141 PetscOptionsEnd(); 7142 } 7143 } else if (ptype == MATPRODUCT_PtAP) { 7144 if (product->api_user) { 7145 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7146 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7147 PetscOptionsEnd(); 7148 } else { 7149 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7150 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7151 PetscOptionsEnd(); 7152 } 7153 } 7154 a = (Mat_MPIAIJ *)A->data; 7155 p = (Mat_MPIAIJ *)P->data; 7156 PetscCall(MatSetSizes(C, m, n, M, N)); 7157 PetscCall(PetscLayoutSetUp(C->rmap)); 7158 PetscCall(PetscLayoutSetUp(C->cmap)); 7159 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7160 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7161 7162 cp = 0; 7163 switch (ptype) { 7164 case MATPRODUCT_AB: /* A * P */ 7165 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7166 7167 /* A_diag * P_local (merged or not) */ 7168 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7169 /* P is product->B */ 7170 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7171 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7172 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7173 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7174 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7175 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7176 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7177 mp[cp]->product->api_user = product->api_user; 7178 PetscCall(MatProductSetFromOptions(mp[cp])); 7179 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7180 PetscCall(ISGetIndices(glob, &globidx)); 7181 rmapt[cp] = 1; 7182 cmapt[cp] = 2; 7183 cmapa[cp] = globidx; 7184 mptmp[cp] = PETSC_FALSE; 7185 cp++; 7186 } else { /* A_diag * P_diag and A_diag * P_off */ 7187 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7188 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7189 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7190 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7191 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7192 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7193 mp[cp]->product->api_user = product->api_user; 7194 PetscCall(MatProductSetFromOptions(mp[cp])); 7195 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7196 rmapt[cp] = 1; 7197 cmapt[cp] = 1; 7198 mptmp[cp] = PETSC_FALSE; 7199 cp++; 7200 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7201 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7202 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7203 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7204 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7205 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7206 mp[cp]->product->api_user = product->api_user; 7207 PetscCall(MatProductSetFromOptions(mp[cp])); 7208 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7209 rmapt[cp] = 1; 7210 cmapt[cp] = 2; 7211 cmapa[cp] = p->garray; 7212 mptmp[cp] = PETSC_FALSE; 7213 cp++; 7214 } 7215 7216 /* A_off * P_other */ 7217 if (mmdata->P_oth) { 7218 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7219 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7220 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7221 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7222 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7223 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7224 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7225 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7226 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7227 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7228 mp[cp]->product->api_user = product->api_user; 7229 PetscCall(MatProductSetFromOptions(mp[cp])); 7230 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7231 rmapt[cp] = 1; 7232 cmapt[cp] = 2; 7233 cmapa[cp] = P_oth_idx; 7234 mptmp[cp] = PETSC_FALSE; 7235 cp++; 7236 } 7237 break; 7238 7239 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7240 /* A is product->B */ 7241 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7242 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7243 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7244 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7245 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7246 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7247 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7248 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7249 mp[cp]->product->api_user = product->api_user; 7250 PetscCall(MatProductSetFromOptions(mp[cp])); 7251 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7252 PetscCall(ISGetIndices(glob, &globidx)); 7253 rmapt[cp] = 2; 7254 rmapa[cp] = globidx; 7255 cmapt[cp] = 2; 7256 cmapa[cp] = globidx; 7257 mptmp[cp] = PETSC_FALSE; 7258 cp++; 7259 } else { 7260 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7261 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7262 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7263 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7264 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7265 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7266 mp[cp]->product->api_user = product->api_user; 7267 PetscCall(MatProductSetFromOptions(mp[cp])); 7268 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7269 PetscCall(ISGetIndices(glob, &globidx)); 7270 rmapt[cp] = 1; 7271 cmapt[cp] = 2; 7272 cmapa[cp] = globidx; 7273 mptmp[cp] = PETSC_FALSE; 7274 cp++; 7275 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7276 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7277 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7278 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7279 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7280 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7281 mp[cp]->product->api_user = product->api_user; 7282 PetscCall(MatProductSetFromOptions(mp[cp])); 7283 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7284 rmapt[cp] = 2; 7285 rmapa[cp] = p->garray; 7286 cmapt[cp] = 2; 7287 cmapa[cp] = globidx; 7288 mptmp[cp] = PETSC_FALSE; 7289 cp++; 7290 } 7291 break; 7292 case MATPRODUCT_PtAP: 7293 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7294 /* P is product->B */ 7295 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7296 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7297 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7298 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7299 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7300 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7301 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7302 mp[cp]->product->api_user = product->api_user; 7303 PetscCall(MatProductSetFromOptions(mp[cp])); 7304 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7305 PetscCall(ISGetIndices(glob, &globidx)); 7306 rmapt[cp] = 2; 7307 rmapa[cp] = globidx; 7308 cmapt[cp] = 2; 7309 cmapa[cp] = globidx; 7310 mptmp[cp] = PETSC_FALSE; 7311 cp++; 7312 if (mmdata->P_oth) { 7313 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7314 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7315 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7316 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7317 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7318 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7319 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7320 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7321 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7322 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7323 mp[cp]->product->api_user = product->api_user; 7324 PetscCall(MatProductSetFromOptions(mp[cp])); 7325 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7326 mptmp[cp] = PETSC_TRUE; 7327 cp++; 7328 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7329 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7330 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7331 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7332 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7333 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7334 mp[cp]->product->api_user = product->api_user; 7335 PetscCall(MatProductSetFromOptions(mp[cp])); 7336 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7337 rmapt[cp] = 2; 7338 rmapa[cp] = globidx; 7339 cmapt[cp] = 2; 7340 cmapa[cp] = P_oth_idx; 7341 mptmp[cp] = PETSC_FALSE; 7342 cp++; 7343 } 7344 break; 7345 default: 7346 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7347 } 7348 /* sanity check */ 7349 if (size > 1) 7350 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7351 7352 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7353 for (i = 0; i < cp; i++) { 7354 mmdata->mp[i] = mp[i]; 7355 mmdata->mptmp[i] = mptmp[i]; 7356 } 7357 mmdata->cp = cp; 7358 C->product->data = mmdata; 7359 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7360 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7361 7362 /* memory type */ 7363 mmdata->mtype = PETSC_MEMTYPE_HOST; 7364 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7365 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7366 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7367 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7368 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7369 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7370 7371 /* prepare coo coordinates for values insertion */ 7372 7373 /* count total nonzeros of those intermediate seqaij Mats 7374 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7375 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7376 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7377 */ 7378 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7379 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7380 if (mptmp[cp]) continue; 7381 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7382 const PetscInt *rmap = rmapa[cp]; 7383 const PetscInt mr = mp[cp]->rmap->n; 7384 const PetscInt rs = C->rmap->rstart; 7385 const PetscInt re = C->rmap->rend; 7386 const PetscInt *ii = mm->i; 7387 for (i = 0; i < mr; i++) { 7388 const PetscInt gr = rmap[i]; 7389 const PetscInt nz = ii[i + 1] - ii[i]; 7390 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7391 else ncoo_oown += nz; /* this row is local */ 7392 } 7393 } else ncoo_d += mm->nz; 7394 } 7395 7396 /* 7397 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7398 7399 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7400 7401 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7402 7403 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7404 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7405 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7406 7407 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7408 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7409 */ 7410 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7411 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7412 7413 /* gather (i,j) of nonzeros inserted by remote procs */ 7414 if (hasoffproc) { 7415 PetscSF msf; 7416 PetscInt ncoo2, *coo_i2, *coo_j2; 7417 7418 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7419 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7420 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7421 7422 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7423 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7424 PetscInt *idxoff = mmdata->off[cp]; 7425 PetscInt *idxown = mmdata->own[cp]; 7426 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7427 const PetscInt *rmap = rmapa[cp]; 7428 const PetscInt *cmap = cmapa[cp]; 7429 const PetscInt *ii = mm->i; 7430 PetscInt *coi = coo_i + ncoo_o; 7431 PetscInt *coj = coo_j + ncoo_o; 7432 const PetscInt mr = mp[cp]->rmap->n; 7433 const PetscInt rs = C->rmap->rstart; 7434 const PetscInt re = C->rmap->rend; 7435 const PetscInt cs = C->cmap->rstart; 7436 for (i = 0; i < mr; i++) { 7437 const PetscInt *jj = mm->j + ii[i]; 7438 const PetscInt gr = rmap[i]; 7439 const PetscInt nz = ii[i + 1] - ii[i]; 7440 if (gr < rs || gr >= re) { /* this is an offproc row */ 7441 for (j = ii[i]; j < ii[i + 1]; j++) { 7442 *coi++ = gr; 7443 *idxoff++ = j; 7444 } 7445 if (!cmapt[cp]) { /* already global */ 7446 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7447 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7448 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7449 } else { /* offdiag */ 7450 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7451 } 7452 ncoo_o += nz; 7453 } else { /* this is a local row */ 7454 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7455 } 7456 } 7457 } 7458 mmdata->off[cp + 1] = idxoff; 7459 mmdata->own[cp + 1] = idxown; 7460 } 7461 7462 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7463 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7464 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7465 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7466 ncoo = ncoo_d + ncoo_oown + ncoo2; 7467 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7468 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7469 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7470 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7471 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7472 PetscCall(PetscFree2(coo_i, coo_j)); 7473 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7474 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7475 coo_i = coo_i2; 7476 coo_j = coo_j2; 7477 } else { /* no offproc values insertion */ 7478 ncoo = ncoo_d; 7479 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7480 7481 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7482 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7483 PetscCall(PetscSFSetUp(mmdata->sf)); 7484 } 7485 mmdata->hasoffproc = hasoffproc; 7486 7487 /* gather (i,j) of nonzeros inserted locally */ 7488 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7489 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7490 PetscInt *coi = coo_i + ncoo_d; 7491 PetscInt *coj = coo_j + ncoo_d; 7492 const PetscInt *jj = mm->j; 7493 const PetscInt *ii = mm->i; 7494 const PetscInt *cmap = cmapa[cp]; 7495 const PetscInt *rmap = rmapa[cp]; 7496 const PetscInt mr = mp[cp]->rmap->n; 7497 const PetscInt rs = C->rmap->rstart; 7498 const PetscInt re = C->rmap->rend; 7499 const PetscInt cs = C->cmap->rstart; 7500 7501 if (mptmp[cp]) continue; 7502 if (rmapt[cp] == 1) { /* consecutive rows */ 7503 /* fill coo_i */ 7504 for (i = 0; i < mr; i++) { 7505 const PetscInt gr = i + rs; 7506 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7507 } 7508 /* fill coo_j */ 7509 if (!cmapt[cp]) { /* type-0, already global */ 7510 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7511 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7512 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7513 } else { /* type-2, local to global for sparse columns */ 7514 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7515 } 7516 ncoo_d += mm->nz; 7517 } else if (rmapt[cp] == 2) { /* sparse rows */ 7518 for (i = 0; i < mr; i++) { 7519 const PetscInt *jj = mm->j + ii[i]; 7520 const PetscInt gr = rmap[i]; 7521 const PetscInt nz = ii[i + 1] - ii[i]; 7522 if (gr >= rs && gr < re) { /* local rows */ 7523 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7524 if (!cmapt[cp]) { /* type-0, already global */ 7525 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7526 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7527 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7528 } else { /* type-2, local to global for sparse columns */ 7529 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7530 } 7531 ncoo_d += nz; 7532 } 7533 } 7534 } 7535 } 7536 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7537 PetscCall(ISDestroy(&glob)); 7538 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7539 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7540 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7541 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7542 7543 /* preallocate with COO data */ 7544 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7545 PetscCall(PetscFree2(coo_i, coo_j)); 7546 PetscFunctionReturn(PETSC_SUCCESS); 7547 } 7548 7549 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7550 { 7551 Mat_Product *product = mat->product; 7552 #if defined(PETSC_HAVE_DEVICE) 7553 PetscBool match = PETSC_FALSE; 7554 PetscBool usecpu = PETSC_FALSE; 7555 #else 7556 PetscBool match = PETSC_TRUE; 7557 #endif 7558 7559 PetscFunctionBegin; 7560 MatCheckProduct(mat, 1); 7561 #if defined(PETSC_HAVE_DEVICE) 7562 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7563 if (match) { /* we can always fallback to the CPU if requested */ 7564 switch (product->type) { 7565 case MATPRODUCT_AB: 7566 if (product->api_user) { 7567 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7568 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7569 PetscOptionsEnd(); 7570 } else { 7571 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7572 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7573 PetscOptionsEnd(); 7574 } 7575 break; 7576 case MATPRODUCT_AtB: 7577 if (product->api_user) { 7578 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7579 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7580 PetscOptionsEnd(); 7581 } else { 7582 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7583 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7584 PetscOptionsEnd(); 7585 } 7586 break; 7587 case MATPRODUCT_PtAP: 7588 if (product->api_user) { 7589 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7590 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7591 PetscOptionsEnd(); 7592 } else { 7593 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7594 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7595 PetscOptionsEnd(); 7596 } 7597 break; 7598 default: 7599 break; 7600 } 7601 match = (PetscBool)!usecpu; 7602 } 7603 #endif 7604 if (match) { 7605 switch (product->type) { 7606 case MATPRODUCT_AB: 7607 case MATPRODUCT_AtB: 7608 case MATPRODUCT_PtAP: 7609 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7610 break; 7611 default: 7612 break; 7613 } 7614 } 7615 /* fallback to MPIAIJ ops */ 7616 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7617 PetscFunctionReturn(PETSC_SUCCESS); 7618 } 7619 7620 /* 7621 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7622 7623 n - the number of block indices in cc[] 7624 cc - the block indices (must be large enough to contain the indices) 7625 */ 7626 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7627 { 7628 PetscInt cnt = -1, nidx, j; 7629 const PetscInt *idx; 7630 7631 PetscFunctionBegin; 7632 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7633 if (nidx) { 7634 cnt = 0; 7635 cc[cnt] = idx[0] / bs; 7636 for (j = 1; j < nidx; j++) { 7637 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7638 } 7639 } 7640 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7641 *n = cnt + 1; 7642 PetscFunctionReturn(PETSC_SUCCESS); 7643 } 7644 7645 /* 7646 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7647 7648 ncollapsed - the number of block indices 7649 collapsed - the block indices (must be large enough to contain the indices) 7650 */ 7651 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7652 { 7653 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7654 7655 PetscFunctionBegin; 7656 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7657 for (i = start + 1; i < start + bs; i++) { 7658 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7659 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7660 cprevtmp = cprev; 7661 cprev = merged; 7662 merged = cprevtmp; 7663 } 7664 *ncollapsed = nprev; 7665 if (collapsed) *collapsed = cprev; 7666 PetscFunctionReturn(PETSC_SUCCESS); 7667 } 7668 7669 /* 7670 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7671 */ 7672 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7673 { 7674 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7675 Mat tGmat; 7676 MPI_Comm comm; 7677 const PetscScalar *vals; 7678 const PetscInt *idx; 7679 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7680 MatScalar *AA; // this is checked in graph 7681 PetscBool isseqaij; 7682 Mat a, b, c; 7683 MatType jtype; 7684 7685 PetscFunctionBegin; 7686 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7687 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7688 PetscCall(MatGetType(Gmat, &jtype)); 7689 PetscCall(MatCreate(comm, &tGmat)); 7690 PetscCall(MatSetType(tGmat, jtype)); 7691 7692 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7693 Also, if the matrix is symmetric, can we skip this 7694 operation? It can be very expensive on large matrices. */ 7695 7696 // global sizes 7697 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7698 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7699 nloc = Iend - Istart; 7700 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7701 if (isseqaij) { 7702 a = Gmat; 7703 b = NULL; 7704 } else { 7705 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7706 a = d->A; 7707 b = d->B; 7708 garray = d->garray; 7709 } 7710 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7711 for (PetscInt row = 0; row < nloc; row++) { 7712 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7713 d_nnz[row] = ncols; 7714 if (ncols > maxcols) maxcols = ncols; 7715 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7716 } 7717 if (b) { 7718 for (PetscInt row = 0; row < nloc; row++) { 7719 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7720 o_nnz[row] = ncols; 7721 if (ncols > maxcols) maxcols = ncols; 7722 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7723 } 7724 } 7725 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7726 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7727 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7728 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7729 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7730 PetscCall(PetscFree2(d_nnz, o_nnz)); 7731 // 7732 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7733 nnz0 = nnz1 = 0; 7734 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7735 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7736 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7737 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7738 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7739 if (PetscRealPart(sv) > vfilter) { 7740 nnz1++; 7741 PetscInt cid = idx[jj] + Istart; //diag 7742 if (c != a) cid = garray[idx[jj]]; 7743 AA[ncol_row] = vals[jj]; 7744 AJ[ncol_row] = cid; 7745 ncol_row++; 7746 } 7747 } 7748 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7749 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7750 } 7751 } 7752 PetscCall(PetscFree2(AA, AJ)); 7753 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7754 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7755 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7756 7757 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7758 7759 *filteredG = tGmat; 7760 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7761 PetscFunctionReturn(PETSC_SUCCESS); 7762 } 7763 7764 /* 7765 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7766 7767 Input Parameter: 7768 . Amat - matrix 7769 - symmetrize - make the result symmetric 7770 + scale - scale with diagonal 7771 7772 Output Parameter: 7773 . a_Gmat - output scalar graph >= 0 7774 7775 */ 7776 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7777 { 7778 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7779 MPI_Comm comm; 7780 Mat Gmat; 7781 PetscBool ismpiaij, isseqaij; 7782 Mat a, b, c; 7783 MatType jtype; 7784 7785 PetscFunctionBegin; 7786 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7787 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7788 PetscCall(MatGetSize(Amat, &MM, &NN)); 7789 PetscCall(MatGetBlockSize(Amat, &bs)); 7790 nloc = (Iend - Istart) / bs; 7791 7792 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7793 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7794 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7795 7796 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7797 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7798 implementation */ 7799 if (bs > 1) { 7800 PetscCall(MatGetType(Amat, &jtype)); 7801 PetscCall(MatCreate(comm, &Gmat)); 7802 PetscCall(MatSetType(Gmat, jtype)); 7803 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7804 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7805 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7806 PetscInt *d_nnz, *o_nnz; 7807 MatScalar *aa, val, *AA; 7808 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7809 if (isseqaij) { 7810 a = Amat; 7811 b = NULL; 7812 } else { 7813 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7814 a = d->A; 7815 b = d->B; 7816 } 7817 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7818 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7819 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7820 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7821 const PetscInt *cols1, *cols2; 7822 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7823 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7824 nnz[brow / bs] = nc2 / bs; 7825 if (nc2 % bs) ok = 0; 7826 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7827 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7828 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7829 if (nc1 != nc2) ok = 0; 7830 else { 7831 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7832 if (cols1[jj] != cols2[jj]) ok = 0; 7833 if (cols1[jj] % bs != jj % bs) ok = 0; 7834 } 7835 } 7836 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7837 } 7838 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7839 if (!ok) { 7840 PetscCall(PetscFree2(d_nnz, o_nnz)); 7841 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7842 goto old_bs; 7843 } 7844 } 7845 } 7846 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7847 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7848 PetscCall(PetscFree2(d_nnz, o_nnz)); 7849 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7850 // diag 7851 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7852 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7853 ai = aseq->i; 7854 n = ai[brow + 1] - ai[brow]; 7855 aj = aseq->j + ai[brow]; 7856 for (int k = 0; k < n; k += bs) { // block columns 7857 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7858 val = 0; 7859 for (int ii = 0; ii < bs; ii++) { // rows in block 7860 aa = aseq->a + ai[brow + ii] + k; 7861 for (int jj = 0; jj < bs; jj++) { // columns in block 7862 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7863 } 7864 } 7865 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7866 AA[k / bs] = val; 7867 } 7868 grow = Istart / bs + brow / bs; 7869 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7870 } 7871 // off-diag 7872 if (ismpiaij) { 7873 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7874 const PetscScalar *vals; 7875 const PetscInt *cols, *garray = aij->garray; 7876 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7877 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7878 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7879 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7880 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7881 AA[k / bs] = 0; 7882 AJ[cidx] = garray[cols[k]] / bs; 7883 } 7884 nc = ncols / bs; 7885 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7886 for (int ii = 0; ii < bs; ii++) { // rows in block 7887 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7888 for (int k = 0; k < ncols; k += bs) { 7889 for (int jj = 0; jj < bs; jj++) { // cols in block 7890 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7891 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7892 } 7893 } 7894 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7895 } 7896 grow = Istart / bs + brow / bs; 7897 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7898 } 7899 } 7900 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7901 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7902 PetscCall(PetscFree2(AA, AJ)); 7903 } else { 7904 const PetscScalar *vals; 7905 const PetscInt *idx; 7906 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7907 old_bs: 7908 /* 7909 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7910 */ 7911 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7912 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7913 if (isseqaij) { 7914 PetscInt max_d_nnz; 7915 /* 7916 Determine exact preallocation count for (sequential) scalar matrix 7917 */ 7918 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7919 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7920 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7921 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7922 PetscCall(PetscFree3(w0, w1, w2)); 7923 } else if (ismpiaij) { 7924 Mat Daij, Oaij; 7925 const PetscInt *garray; 7926 PetscInt max_d_nnz; 7927 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7928 /* 7929 Determine exact preallocation count for diagonal block portion of scalar matrix 7930 */ 7931 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7932 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7933 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7934 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7935 PetscCall(PetscFree3(w0, w1, w2)); 7936 /* 7937 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7938 */ 7939 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7940 o_nnz[jj] = 0; 7941 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7942 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7943 o_nnz[jj] += ncols; 7944 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7945 } 7946 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7947 } 7948 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7949 /* get scalar copy (norms) of matrix */ 7950 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7951 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7952 PetscCall(PetscFree2(d_nnz, o_nnz)); 7953 for (Ii = Istart; Ii < Iend; Ii++) { 7954 PetscInt dest_row = Ii / bs; 7955 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7956 for (jj = 0; jj < ncols; jj++) { 7957 PetscInt dest_col = idx[jj] / bs; 7958 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7959 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7960 } 7961 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7962 } 7963 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7964 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7965 } 7966 } else { 7967 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7968 else { 7969 Gmat = Amat; 7970 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7971 } 7972 if (isseqaij) { 7973 a = Gmat; 7974 b = NULL; 7975 } else { 7976 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7977 a = d->A; 7978 b = d->B; 7979 } 7980 if (filter >= 0 || scale) { 7981 /* take absolute value of each entry */ 7982 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7983 MatInfo info; 7984 PetscScalar *avals; 7985 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7986 PetscCall(MatSeqAIJGetArray(c, &avals)); 7987 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7988 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7989 } 7990 } 7991 } 7992 if (symmetrize) { 7993 PetscBool isset, issym; 7994 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7995 if (!isset || !issym) { 7996 Mat matTrans; 7997 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7998 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7999 PetscCall(MatDestroy(&matTrans)); 8000 } 8001 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8002 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8003 if (scale) { 8004 /* scale c for all diagonal values = 1 or -1 */ 8005 Vec diag; 8006 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8007 PetscCall(MatGetDiagonal(Gmat, diag)); 8008 PetscCall(VecReciprocal(diag)); 8009 PetscCall(VecSqrtAbs(diag)); 8010 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8011 PetscCall(VecDestroy(&diag)); 8012 } 8013 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8014 8015 if (filter >= 0) { 8016 Mat Fmat = NULL; /* some silly compiler needs this */ 8017 8018 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 8019 PetscCall(MatDestroy(&Gmat)); 8020 Gmat = Fmat; 8021 } 8022 *a_Gmat = Gmat; 8023 PetscFunctionReturn(PETSC_SUCCESS); 8024 } 8025 8026 /* 8027 Special version for direct calls from Fortran 8028 */ 8029 #include <petsc/private/fortranimpl.h> 8030 8031 /* Change these macros so can be used in void function */ 8032 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8033 #undef PetscCall 8034 #define PetscCall(...) \ 8035 do { \ 8036 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8037 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8038 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8039 return; \ 8040 } \ 8041 } while (0) 8042 8043 #undef SETERRQ 8044 #define SETERRQ(comm, ierr, ...) \ 8045 do { \ 8046 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8047 return; \ 8048 } while (0) 8049 8050 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8051 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8052 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8053 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8054 #else 8055 #endif 8056 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8057 { 8058 Mat mat = *mmat; 8059 PetscInt m = *mm, n = *mn; 8060 InsertMode addv = *maddv; 8061 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8062 PetscScalar value; 8063 8064 MatCheckPreallocated(mat, 1); 8065 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8066 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8067 { 8068 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8069 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8070 PetscBool roworiented = aij->roworiented; 8071 8072 /* Some Variables required in the macro */ 8073 Mat A = aij->A; 8074 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8075 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8076 MatScalar *aa; 8077 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8078 Mat B = aij->B; 8079 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8080 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8081 MatScalar *ba; 8082 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8083 * cannot use "#if defined" inside a macro. */ 8084 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8085 8086 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8087 PetscInt nonew = a->nonew; 8088 MatScalar *ap1, *ap2; 8089 8090 PetscFunctionBegin; 8091 PetscCall(MatSeqAIJGetArray(A, &aa)); 8092 PetscCall(MatSeqAIJGetArray(B, &ba)); 8093 for (i = 0; i < m; i++) { 8094 if (im[i] < 0) continue; 8095 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8096 if (im[i] >= rstart && im[i] < rend) { 8097 row = im[i] - rstart; 8098 lastcol1 = -1; 8099 rp1 = aj + ai[row]; 8100 ap1 = aa + ai[row]; 8101 rmax1 = aimax[row]; 8102 nrow1 = ailen[row]; 8103 low1 = 0; 8104 high1 = nrow1; 8105 lastcol2 = -1; 8106 rp2 = bj + bi[row]; 8107 ap2 = ba + bi[row]; 8108 rmax2 = bimax[row]; 8109 nrow2 = bilen[row]; 8110 low2 = 0; 8111 high2 = nrow2; 8112 8113 for (j = 0; j < n; j++) { 8114 if (roworiented) value = v[i * n + j]; 8115 else value = v[i + j * m]; 8116 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8117 if (in[j] >= cstart && in[j] < cend) { 8118 col = in[j] - cstart; 8119 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8120 } else if (in[j] < 0) continue; 8121 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8122 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8123 } else { 8124 if (mat->was_assembled) { 8125 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8126 #if defined(PETSC_USE_CTABLE) 8127 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8128 col--; 8129 #else 8130 col = aij->colmap[in[j]] - 1; 8131 #endif 8132 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8133 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8134 col = in[j]; 8135 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8136 B = aij->B; 8137 b = (Mat_SeqAIJ *)B->data; 8138 bimax = b->imax; 8139 bi = b->i; 8140 bilen = b->ilen; 8141 bj = b->j; 8142 rp2 = bj + bi[row]; 8143 ap2 = ba + bi[row]; 8144 rmax2 = bimax[row]; 8145 nrow2 = bilen[row]; 8146 low2 = 0; 8147 high2 = nrow2; 8148 bm = aij->B->rmap->n; 8149 ba = b->a; 8150 inserted = PETSC_FALSE; 8151 } 8152 } else col = in[j]; 8153 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8154 } 8155 } 8156 } else if (!aij->donotstash) { 8157 if (roworiented) { 8158 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8159 } else { 8160 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8161 } 8162 } 8163 } 8164 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8165 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8166 } 8167 PetscFunctionReturnVoid(); 8168 } 8169 8170 /* Undefining these here since they were redefined from their original definition above! No 8171 * other PETSc functions should be defined past this point, as it is impossible to recover the 8172 * original definitions */ 8173 #undef PetscCall 8174 #undef SETERRQ 8175