1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 167 PetscFunctionReturn(PETSC_SUCCESS); 168 } 169 170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 171 { 172 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 173 174 PetscFunctionBegin; 175 if (mat->A) { 176 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 177 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 178 } 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181 182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 183 { 184 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 186 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 187 const PetscInt *ia, *ib; 188 const MatScalar *aa, *bb, *aav, *bav; 189 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 190 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 191 192 PetscFunctionBegin; 193 *keptrows = NULL; 194 195 ia = a->i; 196 ib = b->i; 197 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 198 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 199 for (i = 0; i < m; i++) { 200 na = ia[i + 1] - ia[i]; 201 nb = ib[i + 1] - ib[i]; 202 if (!na && !nb) { 203 cnt++; 204 goto ok1; 205 } 206 aa = aav + ia[i]; 207 for (j = 0; j < na; j++) { 208 if (aa[j] != 0.0) goto ok1; 209 } 210 bb = PetscSafePointerPlusOffset(bav, ib[i]); 211 for (j = 0; j < nb; j++) { 212 if (bb[j] != 0.0) goto ok1; 213 } 214 cnt++; 215 ok1:; 216 } 217 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 218 if (!n0rows) { 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 220 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 221 PetscFunctionReturn(PETSC_SUCCESS); 222 } 223 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 224 cnt = 0; 225 for (i = 0; i < m; i++) { 226 na = ia[i + 1] - ia[i]; 227 nb = ib[i + 1] - ib[i]; 228 if (!na && !nb) continue; 229 aa = aav + ia[i]; 230 for (j = 0; j < na; j++) { 231 if (aa[j] != 0.0) { 232 rows[cnt++] = rstart + i; 233 goto ok2; 234 } 235 } 236 bb = PetscSafePointerPlusOffset(bav, ib[i]); 237 for (j = 0; j < nb; j++) { 238 if (bb[j] != 0.0) { 239 rows[cnt++] = rstart + i; 240 goto ok2; 241 } 242 } 243 ok2:; 244 } 245 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 247 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 252 { 253 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 254 PetscBool cong; 255 256 PetscFunctionBegin; 257 PetscCall(MatHasCongruentLayouts(Y, &cong)); 258 if (Y->assembled && cong) { 259 PetscCall(MatDiagonalSet(aij->A, D, is)); 260 } else { 261 PetscCall(MatDiagonalSet_Default(Y, D, is)); 262 } 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 267 { 268 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 269 PetscInt i, rstart, nrows, *rows; 270 271 PetscFunctionBegin; 272 *zrows = NULL; 273 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 274 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 275 for (i = 0; i < nrows; i++) rows[i] += rstart; 276 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 283 PetscInt i, m, n, *garray = aij->garray; 284 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 285 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 286 PetscReal *work; 287 const PetscScalar *dummy; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 if (type == NORM_INFINITY) { 313 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 314 } else { 315 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 316 } 317 PetscCall(PetscFree(work)); 318 if (type == NORM_2) { 319 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 320 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 321 for (i = 0; i < n; i++) reductions[i] /= m; 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 327 { 328 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 329 IS sis, gis; 330 const PetscInt *isis, *igis; 331 PetscInt n, *iis, nsis, ngis, rstart, i; 332 333 PetscFunctionBegin; 334 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 335 PetscCall(MatFindNonzeroRows(a->B, &gis)); 336 PetscCall(ISGetSize(gis, &ngis)); 337 PetscCall(ISGetSize(sis, &nsis)); 338 PetscCall(ISGetIndices(sis, &isis)); 339 PetscCall(ISGetIndices(gis, &igis)); 340 341 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 342 PetscCall(PetscArraycpy(iis, igis, ngis)); 343 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 344 n = ngis + nsis; 345 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 346 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 347 for (i = 0; i < n; i++) iis[i] += rstart; 348 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 349 350 PetscCall(ISRestoreIndices(sis, &isis)); 351 PetscCall(ISRestoreIndices(gis, &igis)); 352 PetscCall(ISDestroy(&sis)); 353 PetscCall(ISDestroy(&gis)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 /* 358 Local utility routine that creates a mapping from the global column 359 number to the local number in the off-diagonal part of the local 360 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 361 a slightly higher hash table cost; without it it is not scalable (each processor 362 has an order N integer array but is fast to access. 363 */ 364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 365 { 366 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 367 PetscInt n = aij->B->cmap->n, i; 368 369 PetscFunctionBegin; 370 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 371 #if defined(PETSC_USE_CTABLE) 372 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 373 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 374 #else 375 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 376 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 377 #endif 378 PetscFunctionReturn(PETSC_SUCCESS); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 382 do { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col; \ 386 while (high1 - low1 > 5) { \ 387 t = (low1 + high1) / 2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i = low1; _i < high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) { \ 395 ap1[_i] += value; \ 396 /* Not sure LogFlops will slow dow the code or not */ \ 397 (void)PetscLogFlops(1.0); \ 398 } else ap1[_i] = value; \ 399 goto a_noinsert; \ 400 } \ 401 } \ 402 if (value == 0.0 && ignorezeroentries && row != col) { \ 403 low1 = 0; \ 404 high1 = nrow1; \ 405 goto a_noinsert; \ 406 } \ 407 if (nonew == 1) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 413 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 414 N = nrow1++ - 1; \ 415 a->nz++; \ 416 high1++; \ 417 /* shift up all the later entries in this row */ \ 418 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 419 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 B->nonzerostate++; \ 468 b_noinsert:; \ 469 bilen[row] = nrow2; \ 470 } while (0) 471 472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 473 { 474 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 475 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 476 PetscInt l, *garray = mat->garray, diag; 477 PetscScalar *aa, *ba; 478 479 PetscFunctionBegin; 480 /* code only works for square matrices A */ 481 482 /* find size of row to the left of the diagonal part */ 483 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 484 row = row - diag; 485 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 486 if (garray[b->j[b->i[row] + l]] > diag) break; 487 } 488 if (l) { 489 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 490 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 491 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 492 } 493 494 /* diagonal part */ 495 if (a->i[row + 1] - a->i[row]) { 496 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 497 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 498 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 499 } 500 501 /* right of diagonal part */ 502 if (b->i[row + 1] - b->i[row] - l) { 503 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 504 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 505 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 506 } 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 511 { 512 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 513 PetscScalar value = 0.0; 514 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 515 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 516 PetscBool roworiented = aij->roworiented; 517 518 /* Some Variables required in the macro */ 519 Mat A = aij->A; 520 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 521 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 522 PetscBool ignorezeroentries = a->ignorezeroentries; 523 Mat B = aij->B; 524 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 525 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 526 MatScalar *aa, *ba; 527 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 528 PetscInt nonew; 529 MatScalar *ap1, *ap2; 530 531 PetscFunctionBegin; 532 PetscCall(MatSeqAIJGetArray(A, &aa)); 533 PetscCall(MatSeqAIJGetArray(B, &ba)); 534 for (i = 0; i < m; i++) { 535 if (im[i] < 0) continue; 536 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 537 if (im[i] >= rstart && im[i] < rend) { 538 row = im[i] - rstart; 539 lastcol1 = -1; 540 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 541 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 542 rmax1 = aimax[row]; 543 nrow1 = ailen[row]; 544 low1 = 0; 545 high1 = nrow1; 546 lastcol2 = -1; 547 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 548 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 549 rmax2 = bimax[row]; 550 nrow2 = bilen[row]; 551 low2 = 0; 552 high2 = nrow2; 553 554 for (j = 0; j < n; j++) { 555 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 556 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 557 if (in[j] >= cstart && in[j] < cend) { 558 col = in[j] - cstart; 559 nonew = a->nonew; 560 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 561 } else if (in[j] < 0) { 562 continue; 563 } else { 564 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 565 if (mat->was_assembled) { 566 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 567 #if defined(PETSC_USE_CTABLE) 568 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 569 col--; 570 #else 571 col = aij->colmap[in[j]] - 1; 572 #endif 573 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 574 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 575 col = in[j]; 576 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 577 B = aij->B; 578 b = (Mat_SeqAIJ *)B->data; 579 bimax = b->imax; 580 bi = b->i; 581 bilen = b->ilen; 582 bj = b->j; 583 ba = b->a; 584 rp2 = bj + bi[row]; 585 ap2 = ba + bi[row]; 586 rmax2 = bimax[row]; 587 nrow2 = bilen[row]; 588 low2 = 0; 589 high2 = nrow2; 590 bm = aij->B->rmap->n; 591 ba = b->a; 592 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 593 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 594 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 595 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 596 } 597 } else col = in[j]; 598 nonew = b->nonew; 599 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 600 } 601 } 602 } else { 603 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 604 if (!aij->donotstash) { 605 mat->assembled = PETSC_FALSE; 606 if (roworiented) { 607 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 608 } else { 609 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 610 } 611 } 612 } 613 } 614 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 615 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 /* 620 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 621 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 622 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 623 */ 624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 627 Mat A = aij->A; /* diagonal part of the matrix */ 628 Mat B = aij->B; /* off-diagonal part of the matrix */ 629 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 630 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 632 PetscInt *ailen = a->ilen, *aj = a->j; 633 PetscInt *bilen = b->ilen, *bj = b->j; 634 PetscInt am = aij->A->rmap->n, j; 635 PetscInt diag_so_far = 0, dnz; 636 PetscInt offd_so_far = 0, onz; 637 638 PetscFunctionBegin; 639 /* Iterate over all rows of the matrix */ 640 for (j = 0; j < am; j++) { 641 dnz = onz = 0; 642 /* Iterate over all non-zero columns of the current row */ 643 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 644 /* If column is in the diagonal */ 645 if (mat_j[col] >= cstart && mat_j[col] < cend) { 646 aj[diag_so_far++] = mat_j[col] - cstart; 647 dnz++; 648 } else { /* off-diagonal entries */ 649 bj[offd_so_far++] = mat_j[col]; 650 onz++; 651 } 652 } 653 ailen[j] = dnz; 654 bilen[j] = onz; 655 } 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 /* 660 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 661 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 662 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 663 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 664 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 665 */ 666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 669 Mat A = aij->A; /* diagonal part of the matrix */ 670 Mat B = aij->B; /* off-diagonal part of the matrix */ 671 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 673 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 674 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 675 PetscInt *ailen = a->ilen, *aj = a->j; 676 PetscInt *bilen = b->ilen, *bj = b->j; 677 PetscInt am = aij->A->rmap->n, j; 678 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 679 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 680 PetscScalar *aa = a->a, *ba = b->a; 681 682 PetscFunctionBegin; 683 /* Iterate over all rows of the matrix */ 684 for (j = 0; j < am; j++) { 685 dnz_row = onz_row = 0; 686 rowstart_offd = full_offd_i[j]; 687 rowstart_diag = full_diag_i[j]; 688 /* Iterate over all non-zero columns of the current row */ 689 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 690 /* If column is in the diagonal */ 691 if (mat_j[col] >= cstart && mat_j[col] < cend) { 692 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 693 aa[rowstart_diag + dnz_row] = mat_a[col]; 694 dnz_row++; 695 } else { /* off-diagonal entries */ 696 bj[rowstart_offd + onz_row] = mat_j[col]; 697 ba[rowstart_offd + onz_row] = mat_a[col]; 698 onz_row++; 699 } 700 } 701 ailen[j] = dnz_row; 702 bilen[j] = onz_row; 703 } 704 PetscFunctionReturn(PETSC_SUCCESS); 705 } 706 707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 708 { 709 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 710 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 711 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 712 713 PetscFunctionBegin; 714 for (i = 0; i < m; i++) { 715 if (idxm[i] < 0) continue; /* negative row */ 716 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 717 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 718 row = idxm[i] - rstart; 719 for (j = 0; j < n; j++) { 720 if (idxn[j] < 0) continue; /* negative column */ 721 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 722 if (idxn[j] >= cstart && idxn[j] < cend) { 723 col = idxn[j] - cstart; 724 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 725 } else { 726 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 727 #if defined(PETSC_USE_CTABLE) 728 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 729 col--; 730 #else 731 col = aij->colmap[idxn[j]] - 1; 732 #endif 733 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 734 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 735 } 736 } 737 } 738 PetscFunctionReturn(PETSC_SUCCESS); 739 } 740 741 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 744 PetscInt nstash, reallocs; 745 746 PetscFunctionBegin; 747 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 748 749 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 750 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 751 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 758 PetscMPIInt n; 759 PetscInt i, j, rstart, ncols, flg; 760 PetscInt *row, *col; 761 PetscBool other_disassembled; 762 PetscScalar *val; 763 764 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 765 766 PetscFunctionBegin; 767 if (!aij->donotstash && !mat->nooffprocentries) { 768 while (1) { 769 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 770 if (!flg) break; 771 772 for (i = 0; i < n;) { 773 /* Now identify the consecutive vals belonging to the same row */ 774 for (j = i, rstart = row[j]; j < n; j++) { 775 if (row[j] != rstart) break; 776 } 777 if (j < n) ncols = j - i; 778 else ncols = n - i; 779 /* Now assemble all these values with a single function call */ 780 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 781 i = j; 782 } 783 } 784 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 785 } 786 #if defined(PETSC_HAVE_DEVICE) 787 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 788 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 789 if (mat->boundtocpu) { 790 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 791 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 792 } 793 #endif 794 PetscCall(MatAssemblyBegin(aij->A, mode)); 795 PetscCall(MatAssemblyEnd(aij->A, mode)); 796 797 /* determine if any processor has disassembled, if so we must 798 also disassemble ourself, in order that we may reassemble. */ 799 /* 800 if nonzero structure of submatrix B cannot change then we know that 801 no processor disassembled thus we can skip this stuff 802 */ 803 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 804 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 805 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 806 PetscCall(MatDisAssemble_MPIAIJ(mat)); 807 } 808 } 809 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 810 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 811 #if defined(PETSC_HAVE_DEVICE) 812 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 813 #endif 814 PetscCall(MatAssemblyBegin(aij->B, mode)); 815 PetscCall(MatAssemblyEnd(aij->B, mode)); 816 817 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 818 819 aij->rowvalues = NULL; 820 821 PetscCall(VecDestroy(&aij->diag)); 822 823 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 824 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 825 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 826 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 827 } 828 #if defined(PETSC_HAVE_DEVICE) 829 mat->offloadmask = PETSC_OFFLOAD_BOTH; 830 #endif 831 PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 835 { 836 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 837 838 PetscFunctionBegin; 839 PetscCall(MatZeroEntries(l->A)); 840 PetscCall(MatZeroEntries(l->B)); 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 845 { 846 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 847 PetscObjectState sA, sB; 848 PetscInt *lrows; 849 PetscInt r, len; 850 PetscBool cong, lch, gch; 851 852 PetscFunctionBegin; 853 /* get locally owned rows */ 854 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 855 PetscCall(MatHasCongruentLayouts(A, &cong)); 856 /* fix right hand side if needed */ 857 if (x && b) { 858 const PetscScalar *xx; 859 PetscScalar *bb; 860 861 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 862 PetscCall(VecGetArrayRead(x, &xx)); 863 PetscCall(VecGetArray(b, &bb)); 864 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 865 PetscCall(VecRestoreArrayRead(x, &xx)); 866 PetscCall(VecRestoreArray(b, &bb)); 867 } 868 869 sA = mat->A->nonzerostate; 870 sB = mat->B->nonzerostate; 871 872 if (diag != 0.0 && cong) { 873 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 874 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 875 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 876 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 877 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 878 PetscInt nnwA, nnwB; 879 PetscBool nnzA, nnzB; 880 881 nnwA = aijA->nonew; 882 nnwB = aijB->nonew; 883 nnzA = aijA->keepnonzeropattern; 884 nnzB = aijB->keepnonzeropattern; 885 if (!nnzA) { 886 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 887 aijA->nonew = 0; 888 } 889 if (!nnzB) { 890 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 891 aijB->nonew = 0; 892 } 893 /* Must zero here before the next loop */ 894 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 895 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 896 for (r = 0; r < len; ++r) { 897 const PetscInt row = lrows[r] + A->rmap->rstart; 898 if (row >= A->cmap->N) continue; 899 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 900 } 901 aijA->nonew = nnwA; 902 aijB->nonew = nnwB; 903 } else { 904 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 905 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 906 } 907 PetscCall(PetscFree(lrows)); 908 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 909 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 910 911 /* reduce nonzerostate */ 912 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 913 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 914 if (gch) A->nonzerostate++; 915 PetscFunctionReturn(PETSC_SUCCESS); 916 } 917 918 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 919 { 920 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 921 PetscMPIInt n = A->rmap->n; 922 PetscInt i, j, r, m, len = 0; 923 PetscInt *lrows, *owners = A->rmap->range; 924 PetscMPIInt p = 0; 925 PetscSFNode *rrows; 926 PetscSF sf; 927 const PetscScalar *xx; 928 PetscScalar *bb, *mask, *aij_a; 929 Vec xmask, lmask; 930 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 931 const PetscInt *aj, *ii, *ridx; 932 PetscScalar *aa; 933 934 PetscFunctionBegin; 935 /* Create SF where leaves are input rows and roots are owned rows */ 936 PetscCall(PetscMalloc1(n, &lrows)); 937 for (r = 0; r < n; ++r) lrows[r] = -1; 938 PetscCall(PetscMalloc1(N, &rrows)); 939 for (r = 0; r < N; ++r) { 940 const PetscInt idx = rows[r]; 941 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 942 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 943 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 944 } 945 rrows[r].rank = p; 946 rrows[r].index = rows[r] - owners[p]; 947 } 948 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 949 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 950 /* Collect flags for rows to be zeroed */ 951 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 952 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 953 PetscCall(PetscSFDestroy(&sf)); 954 /* Compress and put in row numbers */ 955 for (r = 0; r < n; ++r) 956 if (lrows[r] >= 0) lrows[len++] = r; 957 /* zero diagonal part of matrix */ 958 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 959 /* handle off-diagonal part of matrix */ 960 PetscCall(MatCreateVecs(A, &xmask, NULL)); 961 PetscCall(VecDuplicate(l->lvec, &lmask)); 962 PetscCall(VecGetArray(xmask, &bb)); 963 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 964 PetscCall(VecRestoreArray(xmask, &bb)); 965 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 966 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 967 PetscCall(VecDestroy(&xmask)); 968 if (x && b) { /* this code is buggy when the row and column layout don't match */ 969 PetscBool cong; 970 971 PetscCall(MatHasCongruentLayouts(A, &cong)); 972 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 973 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 974 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 975 PetscCall(VecGetArrayRead(l->lvec, &xx)); 976 PetscCall(VecGetArray(b, &bb)); 977 } 978 PetscCall(VecGetArray(lmask, &mask)); 979 /* remove zeroed rows of off-diagonal matrix */ 980 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 981 ii = aij->i; 982 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 983 /* loop over all elements of off process part of matrix zeroing removed columns*/ 984 if (aij->compressedrow.use) { 985 m = aij->compressedrow.nrows; 986 ii = aij->compressedrow.i; 987 ridx = aij->compressedrow.rindex; 988 for (i = 0; i < m; i++) { 989 n = ii[i + 1] - ii[i]; 990 aj = aij->j + ii[i]; 991 aa = aij_a + ii[i]; 992 993 for (j = 0; j < n; j++) { 994 if (PetscAbsScalar(mask[*aj])) { 995 if (b) bb[*ridx] -= *aa * xx[*aj]; 996 *aa = 0.0; 997 } 998 aa++; 999 aj++; 1000 } 1001 ridx++; 1002 } 1003 } else { /* do not use compressed row format */ 1004 m = l->B->rmap->n; 1005 for (i = 0; i < m; i++) { 1006 n = ii[i + 1] - ii[i]; 1007 aj = aij->j + ii[i]; 1008 aa = aij_a + ii[i]; 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[i] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 } 1018 } 1019 if (x && b) { 1020 PetscCall(VecRestoreArray(b, &bb)); 1021 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1022 } 1023 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1024 PetscCall(VecRestoreArray(lmask, &mask)); 1025 PetscCall(VecDestroy(&lmask)); 1026 PetscCall(PetscFree(lrows)); 1027 1028 /* only change matrix nonzero state if pattern was allowed to be changed */ 1029 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1030 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1031 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1032 } 1033 PetscFunctionReturn(PETSC_SUCCESS); 1034 } 1035 1036 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1037 { 1038 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1039 PetscInt nt; 1040 VecScatter Mvctx = a->Mvctx; 1041 1042 PetscFunctionBegin; 1043 PetscCall(VecGetLocalSize(xx, &nt)); 1044 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1045 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1046 PetscUseTypeMethod(a->A, mult, xx, yy); 1047 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1048 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 1056 PetscFunctionBegin; 1057 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1058 PetscFunctionReturn(PETSC_SUCCESS); 1059 } 1060 1061 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1062 { 1063 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1064 VecScatter Mvctx = a->Mvctx; 1065 1066 PetscFunctionBegin; 1067 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1068 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1069 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1070 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1071 PetscFunctionReturn(PETSC_SUCCESS); 1072 } 1073 1074 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1075 { 1076 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1077 1078 PetscFunctionBegin; 1079 /* do nondiagonal part */ 1080 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1081 /* do local part */ 1082 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1083 /* add partial results together */ 1084 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1085 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1090 { 1091 MPI_Comm comm; 1092 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1093 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1094 IS Me, Notme; 1095 PetscInt M, N, first, last, *notme, i; 1096 PetscBool lf; 1097 PetscMPIInt size; 1098 1099 PetscFunctionBegin; 1100 /* Easy test: symmetric diagonal block */ 1101 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1102 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1103 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1104 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1105 PetscCallMPI(MPI_Comm_size(comm, &size)); 1106 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1107 1108 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1109 PetscCall(MatGetSize(Amat, &M, &N)); 1110 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1111 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1112 for (i = 0; i < first; i++) notme[i] = i; 1113 for (i = last; i < M; i++) notme[i - last + first] = i; 1114 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1115 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1116 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1117 Aoff = Aoffs[0]; 1118 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1119 Boff = Boffs[0]; 1120 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1121 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1122 PetscCall(MatDestroyMatrices(1, &Boffs)); 1123 PetscCall(ISDestroy(&Me)); 1124 PetscCall(ISDestroy(&Notme)); 1125 PetscCall(PetscFree(notme)); 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128 1129 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1130 { 1131 PetscFunctionBegin; 1132 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1133 PetscFunctionReturn(PETSC_SUCCESS); 1134 } 1135 1136 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1137 { 1138 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1139 1140 PetscFunctionBegin; 1141 /* do nondiagonal part */ 1142 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1143 /* do local part */ 1144 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1145 /* add partial results together */ 1146 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1147 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1148 PetscFunctionReturn(PETSC_SUCCESS); 1149 } 1150 1151 /* 1152 This only works correctly for square matrices where the subblock A->A is the 1153 diagonal block 1154 */ 1155 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1161 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1162 PetscCall(MatGetDiagonal(a->A, v)); 1163 PetscFunctionReturn(PETSC_SUCCESS); 1164 } 1165 1166 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1167 { 1168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCall(MatScale(a->A, aa)); 1172 PetscCall(MatScale(a->B, aa)); 1173 PetscFunctionReturn(PETSC_SUCCESS); 1174 } 1175 1176 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1177 { 1178 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1179 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1180 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1181 const PetscInt *garray = aij->garray; 1182 const PetscScalar *aa, *ba; 1183 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1184 PetscInt64 nz, hnz; 1185 PetscInt *rowlens; 1186 PetscInt *colidxs; 1187 PetscScalar *matvals; 1188 PetscMPIInt rank; 1189 1190 PetscFunctionBegin; 1191 PetscCall(PetscViewerSetUp(viewer)); 1192 1193 M = mat->rmap->N; 1194 N = mat->cmap->N; 1195 m = mat->rmap->n; 1196 rs = mat->rmap->rstart; 1197 cs = mat->cmap->rstart; 1198 nz = A->nz + B->nz; 1199 1200 /* write matrix header */ 1201 header[0] = MAT_FILE_CLASSID; 1202 header[1] = M; 1203 header[2] = N; 1204 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1205 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1206 if (rank == 0) { 1207 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1208 else header[3] = (PetscInt)hnz; 1209 } 1210 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1211 1212 /* fill in and store row lengths */ 1213 PetscCall(PetscMalloc1(m, &rowlens)); 1214 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1215 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1216 PetscCall(PetscFree(rowlens)); 1217 1218 /* fill in and store column indices */ 1219 PetscCall(PetscMalloc1(nz, &colidxs)); 1220 for (cnt = 0, i = 0; i < m; i++) { 1221 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1222 if (garray[B->j[jb]] > cs) break; 1223 colidxs[cnt++] = garray[B->j[jb]]; 1224 } 1225 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1226 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1227 } 1228 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1229 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1230 PetscCall(PetscFree(colidxs)); 1231 1232 /* fill in and store nonzero values */ 1233 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1235 PetscCall(PetscMalloc1(nz, &matvals)); 1236 for (cnt = 0, i = 0; i < m; i++) { 1237 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1242 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1243 } 1244 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1245 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1246 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1247 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1248 PetscCall(PetscFree(matvals)); 1249 1250 /* write block size option to the viewer's .info file */ 1251 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1252 PetscFunctionReturn(PETSC_SUCCESS); 1253 } 1254 1255 #include <petscdraw.h> 1256 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1257 { 1258 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1259 PetscMPIInt rank = aij->rank, size = aij->size; 1260 PetscBool isdraw, iascii, isbinary; 1261 PetscViewer sviewer; 1262 PetscViewerFormat format; 1263 1264 PetscFunctionBegin; 1265 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1266 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1267 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1268 if (iascii) { 1269 PetscCall(PetscViewerGetFormat(viewer, &format)); 1270 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1271 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1272 PetscCall(PetscMalloc1(size, &nz)); 1273 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1274 for (i = 0; i < (PetscInt)size; i++) { 1275 nmax = PetscMax(nmax, nz[i]); 1276 nmin = PetscMin(nmin, nz[i]); 1277 navg += nz[i]; 1278 } 1279 PetscCall(PetscFree(nz)); 1280 navg = navg / size; 1281 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 PetscCall(PetscViewerGetFormat(viewer, &format)); 1285 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1286 MatInfo info; 1287 PetscInt *inodes = NULL; 1288 1289 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1290 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1291 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1292 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1293 if (!inodes) { 1294 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1295 (double)info.memory)); 1296 } else { 1297 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1298 (double)info.memory)); 1299 } 1300 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1301 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1302 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1303 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1304 PetscCall(PetscViewerFlush(viewer)); 1305 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1306 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1307 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1310 PetscInt inodecount, inodelimit, *inodes; 1311 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1312 if (inodes) { 1313 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1314 } else { 1315 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } 1321 } else if (isbinary) { 1322 if (size == 1) { 1323 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1324 PetscCall(MatView(aij->A, viewer)); 1325 } else { 1326 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1327 } 1328 PetscFunctionReturn(PETSC_SUCCESS); 1329 } else if (iascii && size == 1) { 1330 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1331 PetscCall(MatView(aij->A, viewer)); 1332 PetscFunctionReturn(PETSC_SUCCESS); 1333 } else if (isdraw) { 1334 PetscDraw draw; 1335 PetscBool isnull; 1336 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1337 PetscCall(PetscDrawIsNull(draw, &isnull)); 1338 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1339 } 1340 1341 { /* assemble the entire matrix onto first processor */ 1342 Mat A = NULL, Av; 1343 IS isrow, iscol; 1344 1345 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1346 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1347 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1348 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1349 /* The commented code uses MatCreateSubMatrices instead */ 1350 /* 1351 Mat *AA, A = NULL, Av; 1352 IS isrow,iscol; 1353 1354 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1355 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1356 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1357 if (rank == 0) { 1358 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1359 A = AA[0]; 1360 Av = AA[0]; 1361 } 1362 PetscCall(MatDestroySubMatrices(1,&AA)); 1363 */ 1364 PetscCall(ISDestroy(&iscol)); 1365 PetscCall(ISDestroy(&isrow)); 1366 /* 1367 Everyone has to call to draw the matrix since the graphics waits are 1368 synchronized across all processors that share the PetscDraw object 1369 */ 1370 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1371 if (rank == 0) { 1372 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1373 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1374 } 1375 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1376 PetscCall(MatDestroy(&A)); 1377 } 1378 PetscFunctionReturn(PETSC_SUCCESS); 1379 } 1380 1381 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1382 { 1383 PetscBool iascii, isdraw, issocket, isbinary; 1384 1385 PetscFunctionBegin; 1386 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1387 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1388 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1389 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1390 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1391 PetscFunctionReturn(PETSC_SUCCESS); 1392 } 1393 1394 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1395 { 1396 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1397 Vec bb1 = NULL; 1398 PetscBool hasop; 1399 1400 PetscFunctionBegin; 1401 if (flag == SOR_APPLY_UPPER) { 1402 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1403 PetscFunctionReturn(PETSC_SUCCESS); 1404 } 1405 1406 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1407 1408 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1409 if (flag & SOR_ZERO_INITIAL_GUESS) { 1410 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1411 its--; 1412 } 1413 1414 while (its--) { 1415 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1416 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1417 1418 /* update rhs: bb1 = bb - B*x */ 1419 PetscCall(VecScale(mat->lvec, -1.0)); 1420 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1421 1422 /* local sweep */ 1423 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1424 } 1425 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1426 if (flag & SOR_ZERO_INITIAL_GUESS) { 1427 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1428 its--; 1429 } 1430 while (its--) { 1431 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1432 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1433 1434 /* update rhs: bb1 = bb - B*x */ 1435 PetscCall(VecScale(mat->lvec, -1.0)); 1436 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1437 1438 /* local sweep */ 1439 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1440 } 1441 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1442 if (flag & SOR_ZERO_INITIAL_GUESS) { 1443 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1444 its--; 1445 } 1446 while (its--) { 1447 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1448 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1449 1450 /* update rhs: bb1 = bb - B*x */ 1451 PetscCall(VecScale(mat->lvec, -1.0)); 1452 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1453 1454 /* local sweep */ 1455 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1456 } 1457 } else if (flag & SOR_EISENSTAT) { 1458 Vec xx1; 1459 1460 PetscCall(VecDuplicate(bb, &xx1)); 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1462 1463 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1464 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1465 if (!mat->diag) { 1466 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1467 PetscCall(MatGetDiagonal(matin, mat->diag)); 1468 } 1469 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1470 if (hasop) { 1471 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1472 } else { 1473 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1474 } 1475 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1476 1477 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1478 1479 /* local sweep */ 1480 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1481 PetscCall(VecAXPY(xx, 1.0, xx1)); 1482 PetscCall(VecDestroy(&xx1)); 1483 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1484 1485 PetscCall(VecDestroy(&bb1)); 1486 1487 matin->factorerrortype = mat->A->factorerrortype; 1488 PetscFunctionReturn(PETSC_SUCCESS); 1489 } 1490 1491 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1492 { 1493 Mat aA, aB, Aperm; 1494 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1495 PetscScalar *aa, *ba; 1496 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1497 PetscSF rowsf, sf; 1498 IS parcolp = NULL; 1499 PetscBool done; 1500 1501 PetscFunctionBegin; 1502 PetscCall(MatGetLocalSize(A, &m, &n)); 1503 PetscCall(ISGetIndices(rowp, &rwant)); 1504 PetscCall(ISGetIndices(colp, &cwant)); 1505 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1506 1507 /* Invert row permutation to find out where my rows should go */ 1508 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1509 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1510 PetscCall(PetscSFSetFromOptions(rowsf)); 1511 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1512 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1513 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1514 1515 /* Invert column permutation to find out where my columns should go */ 1516 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1517 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1518 PetscCall(PetscSFSetFromOptions(sf)); 1519 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1520 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1521 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1522 PetscCall(PetscSFDestroy(&sf)); 1523 1524 PetscCall(ISRestoreIndices(rowp, &rwant)); 1525 PetscCall(ISRestoreIndices(colp, &cwant)); 1526 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1527 1528 /* Find out where my gcols should go */ 1529 PetscCall(MatGetSize(aB, NULL, &ng)); 1530 PetscCall(PetscMalloc1(ng, &gcdest)); 1531 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1532 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1533 PetscCall(PetscSFSetFromOptions(sf)); 1534 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1535 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1536 PetscCall(PetscSFDestroy(&sf)); 1537 1538 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1539 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1540 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1541 for (i = 0; i < m; i++) { 1542 PetscInt row = rdest[i]; 1543 PetscMPIInt rowner; 1544 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1545 for (j = ai[i]; j < ai[i + 1]; j++) { 1546 PetscInt col = cdest[aj[j]]; 1547 PetscMPIInt cowner; 1548 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1549 if (rowner == cowner) dnnz[i]++; 1550 else onnz[i]++; 1551 } 1552 for (j = bi[i]; j < bi[i + 1]; j++) { 1553 PetscInt col = gcdest[bj[j]]; 1554 PetscMPIInt cowner; 1555 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1556 if (rowner == cowner) dnnz[i]++; 1557 else onnz[i]++; 1558 } 1559 } 1560 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1561 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1562 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1563 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1564 PetscCall(PetscSFDestroy(&rowsf)); 1565 1566 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1567 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1568 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1569 for (i = 0; i < m; i++) { 1570 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1571 PetscInt j0, rowlen; 1572 rowlen = ai[i + 1] - ai[i]; 1573 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1574 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1575 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1576 } 1577 rowlen = bi[i + 1] - bi[i]; 1578 for (j0 = j = 0; j < rowlen; j0 = j) { 1579 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1580 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1581 } 1582 } 1583 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1584 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1585 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1586 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1587 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1588 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1589 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1590 PetscCall(PetscFree3(work, rdest, cdest)); 1591 PetscCall(PetscFree(gcdest)); 1592 if (parcolp) PetscCall(ISDestroy(&colp)); 1593 *B = Aperm; 1594 PetscFunctionReturn(PETSC_SUCCESS); 1595 } 1596 1597 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1598 { 1599 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1600 1601 PetscFunctionBegin; 1602 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1603 if (ghosts) *ghosts = aij->garray; 1604 PetscFunctionReturn(PETSC_SUCCESS); 1605 } 1606 1607 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1608 { 1609 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1610 Mat A = mat->A, B = mat->B; 1611 PetscLogDouble isend[5], irecv[5]; 1612 1613 PetscFunctionBegin; 1614 info->block_size = 1.0; 1615 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1616 1617 isend[0] = info->nz_used; 1618 isend[1] = info->nz_allocated; 1619 isend[2] = info->nz_unneeded; 1620 isend[3] = info->memory; 1621 isend[4] = info->mallocs; 1622 1623 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1624 1625 isend[0] += info->nz_used; 1626 isend[1] += info->nz_allocated; 1627 isend[2] += info->nz_unneeded; 1628 isend[3] += info->memory; 1629 isend[4] += info->mallocs; 1630 if (flag == MAT_LOCAL) { 1631 info->nz_used = isend[0]; 1632 info->nz_allocated = isend[1]; 1633 info->nz_unneeded = isend[2]; 1634 info->memory = isend[3]; 1635 info->mallocs = isend[4]; 1636 } else if (flag == MAT_GLOBAL_MAX) { 1637 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1638 1639 info->nz_used = irecv[0]; 1640 info->nz_allocated = irecv[1]; 1641 info->nz_unneeded = irecv[2]; 1642 info->memory = irecv[3]; 1643 info->mallocs = irecv[4]; 1644 } else if (flag == MAT_GLOBAL_SUM) { 1645 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1646 1647 info->nz_used = irecv[0]; 1648 info->nz_allocated = irecv[1]; 1649 info->nz_unneeded = irecv[2]; 1650 info->memory = irecv[3]; 1651 info->mallocs = irecv[4]; 1652 } 1653 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1654 info->fill_ratio_needed = 0; 1655 info->factor_mallocs = 0; 1656 PetscFunctionReturn(PETSC_SUCCESS); 1657 } 1658 1659 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1660 { 1661 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1662 1663 PetscFunctionBegin; 1664 switch (op) { 1665 case MAT_NEW_NONZERO_LOCATIONS: 1666 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1667 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1668 case MAT_KEEP_NONZERO_PATTERN: 1669 case MAT_NEW_NONZERO_LOCATION_ERR: 1670 case MAT_USE_INODES: 1671 case MAT_IGNORE_ZERO_ENTRIES: 1672 case MAT_FORM_EXPLICIT_TRANSPOSE: 1673 MatCheckPreallocated(A, 1); 1674 PetscCall(MatSetOption(a->A, op, flg)); 1675 PetscCall(MatSetOption(a->B, op, flg)); 1676 break; 1677 case MAT_ROW_ORIENTED: 1678 MatCheckPreallocated(A, 1); 1679 a->roworiented = flg; 1680 1681 PetscCall(MatSetOption(a->A, op, flg)); 1682 PetscCall(MatSetOption(a->B, op, flg)); 1683 break; 1684 case MAT_FORCE_DIAGONAL_ENTRIES: 1685 case MAT_SORTED_FULL: 1686 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1687 break; 1688 case MAT_IGNORE_OFF_PROC_ENTRIES: 1689 a->donotstash = flg; 1690 break; 1691 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1692 case MAT_SPD: 1693 case MAT_SYMMETRIC: 1694 case MAT_STRUCTURALLY_SYMMETRIC: 1695 case MAT_HERMITIAN: 1696 case MAT_SYMMETRY_ETERNAL: 1697 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1698 case MAT_SPD_ETERNAL: 1699 /* if the diagonal matrix is square it inherits some of the properties above */ 1700 break; 1701 case MAT_SUBMAT_SINGLEIS: 1702 A->submat_singleis = flg; 1703 break; 1704 case MAT_STRUCTURE_ONLY: 1705 /* The option is handled directly by MatSetOption() */ 1706 break; 1707 default: 1708 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1716 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1717 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1718 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1719 PetscInt *cmap, *idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1730 PetscInt max = 1, tmp; 1731 for (i = 0; i < matin->rmap->n; i++) { 1732 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1736 } 1737 1738 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; 1742 pcA = &cworkA; 1743 pvB = &vworkB; 1744 pcB = &cworkB; 1745 if (!v) { 1746 pvA = NULL; 1747 pvB = NULL; 1748 } 1749 if (!idx) { 1750 pcA = NULL; 1751 if (!v) pcB = NULL; 1752 } 1753 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1770 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1776 } else { 1777 for (i = 0; i < nzB; i++) { 1778 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1779 else break; 1780 } 1781 imark = i; 1782 } 1783 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1784 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1785 } 1786 } else { 1787 if (idx) *idx = NULL; 1788 if (v) *v = NULL; 1789 } 1790 } 1791 *nz = nztot; 1792 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1793 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 1801 PetscFunctionBegin; 1802 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1803 aij->getrowactive = PETSC_FALSE; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1808 { 1809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1810 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1811 PetscInt i, j, cstart = mat->cmap->rstart; 1812 PetscReal sum = 0.0; 1813 const MatScalar *v, *amata, *bmata; 1814 1815 PetscFunctionBegin; 1816 if (aij->size == 1) { 1817 PetscCall(MatNorm(aij->A, type, norm)); 1818 } else { 1819 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1820 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1821 if (type == NORM_FROBENIUS) { 1822 v = amata; 1823 for (i = 0; i < amat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v) * (*v)); 1825 v++; 1826 } 1827 v = bmata; 1828 for (i = 0; i < bmat->nz; i++) { 1829 sum += PetscRealPart(PetscConj(*v) * (*v)); 1830 v++; 1831 } 1832 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp, *tmp2; 1837 PetscInt *jj, *garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1839 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1840 *norm = 0.0; 1841 v = amata; 1842 jj = amat->j; 1843 for (j = 0; j < amat->nz; j++) { 1844 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1845 v++; 1846 } 1847 v = bmata; 1848 jj = bmat->j; 1849 for (j = 0; j < bmat->nz; j++) { 1850 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1851 v++; 1852 } 1853 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1854 for (j = 0; j < mat->cmap->N; j++) { 1855 if (tmp2[j] > *norm) *norm = tmp2[j]; 1856 } 1857 PetscCall(PetscFree(tmp)); 1858 PetscCall(PetscFree(tmp2)); 1859 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1860 } else if (type == NORM_INFINITY) { /* max row norm */ 1861 PetscReal ntemp = 0.0; 1862 for (j = 0; j < aij->A->rmap->n; j++) { 1863 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1864 sum = 0.0; 1865 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1866 sum += PetscAbsScalar(*v); 1867 v++; 1868 } 1869 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1870 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1871 sum += PetscAbsScalar(*v); 1872 v++; 1873 } 1874 if (sum > ntemp) ntemp = sum; 1875 } 1876 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1877 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1878 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1879 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1880 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1881 } 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1886 { 1887 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1888 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1889 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1890 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1891 Mat B, A_diag, *B_diag; 1892 const MatScalar *pbv, *bv; 1893 1894 PetscFunctionBegin; 1895 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1896 ma = A->rmap->n; 1897 na = A->cmap->n; 1898 mb = a->B->rmap->n; 1899 nb = a->B->cmap->n; 1900 ai = Aloc->i; 1901 aj = Aloc->j; 1902 bi = Bloc->i; 1903 bj = Bloc->j; 1904 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1905 PetscInt *d_nnz, *g_nnz, *o_nnz; 1906 PetscSFNode *oloc; 1907 PETSC_UNUSED PetscSF sf; 1908 1909 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1910 /* compute d_nnz for preallocation */ 1911 PetscCall(PetscArrayzero(d_nnz, na)); 1912 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1913 /* compute local off-diagonal contributions */ 1914 PetscCall(PetscArrayzero(g_nnz, nb)); 1915 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1916 /* map those to global */ 1917 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1918 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1919 PetscCall(PetscSFSetFromOptions(sf)); 1920 PetscCall(PetscArrayzero(o_nnz, na)); 1921 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1922 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1923 PetscCall(PetscSFDestroy(&sf)); 1924 1925 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1926 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1927 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1928 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1929 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1930 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1931 } else { 1932 B = *matout; 1933 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1934 } 1935 1936 b = (Mat_MPIAIJ *)B->data; 1937 A_diag = a->A; 1938 B_diag = &b->A; 1939 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1940 A_diag_ncol = A_diag->cmap->N; 1941 B_diag_ilen = sub_B_diag->ilen; 1942 B_diag_i = sub_B_diag->i; 1943 1944 /* Set ilen for diagonal of B */ 1945 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1946 1947 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1948 very quickly (=without using MatSetValues), because all writes are local. */ 1949 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1950 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1951 1952 /* copy over the B part */ 1953 PetscCall(PetscMalloc1(bi[mb], &cols)); 1954 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1955 pbv = bv; 1956 row = A->rmap->rstart; 1957 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1958 cols_tmp = cols; 1959 for (i = 0; i < mb; i++) { 1960 ncol = bi[i + 1] - bi[i]; 1961 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1962 row++; 1963 if (pbv) pbv += ncol; 1964 if (cols_tmp) cols_tmp += ncol; 1965 } 1966 PetscCall(PetscFree(cols)); 1967 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1968 1969 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1970 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1971 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1972 *matout = B; 1973 } else { 1974 PetscCall(MatHeaderMerge(A, &B)); 1975 } 1976 PetscFunctionReturn(PETSC_SUCCESS); 1977 } 1978 1979 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1980 { 1981 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1982 Mat a = aij->A, b = aij->B; 1983 PetscInt s1, s2, s3; 1984 1985 PetscFunctionBegin; 1986 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1987 if (rr) { 1988 PetscCall(VecGetLocalSize(rr, &s1)); 1989 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1990 /* Overlap communication with computation. */ 1991 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1992 } 1993 if (ll) { 1994 PetscCall(VecGetLocalSize(ll, &s1)); 1995 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1996 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1997 } 1998 /* scale the diagonal block */ 1999 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2000 2001 if (rr) { 2002 /* Do a scatter end and then right scale the off-diagonal block */ 2003 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2004 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2005 } 2006 PetscFunctionReturn(PETSC_SUCCESS); 2007 } 2008 2009 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2010 { 2011 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2012 2013 PetscFunctionBegin; 2014 PetscCall(MatSetUnfactored(a->A)); 2015 PetscFunctionReturn(PETSC_SUCCESS); 2016 } 2017 2018 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2019 { 2020 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2021 Mat a, b, c, d; 2022 PetscBool flg; 2023 2024 PetscFunctionBegin; 2025 a = matA->A; 2026 b = matA->B; 2027 c = matB->A; 2028 d = matB->B; 2029 2030 PetscCall(MatEqual(a, c, &flg)); 2031 if (flg) PetscCall(MatEqual(b, d, &flg)); 2032 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2033 PetscFunctionReturn(PETSC_SUCCESS); 2034 } 2035 2036 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2037 { 2038 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2039 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2040 2041 PetscFunctionBegin; 2042 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2043 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2044 /* because of the column compression in the off-processor part of the matrix a->B, 2045 the number of columns in a->B and b->B may be different, hence we cannot call 2046 the MatCopy() directly on the two parts. If need be, we can provide a more 2047 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2048 then copying the submatrices */ 2049 PetscCall(MatCopy_Basic(A, B, str)); 2050 } else { 2051 PetscCall(MatCopy(a->A, b->A, str)); 2052 PetscCall(MatCopy(a->B, b->B, str)); 2053 } 2054 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2055 PetscFunctionReturn(PETSC_SUCCESS); 2056 } 2057 2058 /* 2059 Computes the number of nonzeros per row needed for preallocation when X and Y 2060 have different nonzero structure. 2061 */ 2062 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2063 { 2064 PetscInt i, j, k, nzx, nzy; 2065 2066 PetscFunctionBegin; 2067 /* Set the number of nonzeros in the new matrix */ 2068 for (i = 0; i < m; i++) { 2069 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2070 nzx = xi[i + 1] - xi[i]; 2071 nzy = yi[i + 1] - yi[i]; 2072 nnz[i] = 0; 2073 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2074 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2075 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2076 nnz[i]++; 2077 } 2078 for (; k < nzy; k++) nnz[i]++; 2079 } 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2084 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2085 { 2086 PetscInt m = Y->rmap->N; 2087 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2088 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2089 2090 PetscFunctionBegin; 2091 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2092 PetscFunctionReturn(PETSC_SUCCESS); 2093 } 2094 2095 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2096 { 2097 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2098 2099 PetscFunctionBegin; 2100 if (str == SAME_NONZERO_PATTERN) { 2101 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2102 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2103 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2104 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2105 } else { 2106 Mat B; 2107 PetscInt *nnz_d, *nnz_o; 2108 2109 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2110 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2111 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2112 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2113 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2114 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2115 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2116 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2117 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2118 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2119 PetscCall(MatHeaderMerge(Y, &B)); 2120 PetscCall(PetscFree(nnz_d)); 2121 PetscCall(PetscFree(nnz_o)); 2122 } 2123 PetscFunctionReturn(PETSC_SUCCESS); 2124 } 2125 2126 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2127 2128 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2129 { 2130 PetscFunctionBegin; 2131 if (PetscDefined(USE_COMPLEX)) { 2132 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2133 2134 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2135 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2136 } 2137 PetscFunctionReturn(PETSC_SUCCESS); 2138 } 2139 2140 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2141 { 2142 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2143 2144 PetscFunctionBegin; 2145 PetscCall(MatRealPart(a->A)); 2146 PetscCall(MatRealPart(a->B)); 2147 PetscFunctionReturn(PETSC_SUCCESS); 2148 } 2149 2150 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatImaginaryPart(a->A)); 2156 PetscCall(MatImaginaryPart(a->B)); 2157 PetscFunctionReturn(PETSC_SUCCESS); 2158 } 2159 2160 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2163 PetscInt i, *idxb = NULL, m = A->rmap->n; 2164 PetscScalar *va, *vv; 2165 Vec vB, vA; 2166 const PetscScalar *vb; 2167 2168 PetscFunctionBegin; 2169 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2170 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2171 2172 PetscCall(VecGetArrayWrite(vA, &va)); 2173 if (idx) { 2174 for (i = 0; i < m; i++) { 2175 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2176 } 2177 } 2178 2179 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2180 PetscCall(PetscMalloc1(m, &idxb)); 2181 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2182 2183 PetscCall(VecGetArrayWrite(v, &vv)); 2184 PetscCall(VecGetArrayRead(vB, &vb)); 2185 for (i = 0; i < m; i++) { 2186 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2187 vv[i] = vb[i]; 2188 if (idx) idx[i] = a->garray[idxb[i]]; 2189 } else { 2190 vv[i] = va[i]; 2191 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2192 } 2193 } 2194 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2195 PetscCall(VecRestoreArrayWrite(vA, &va)); 2196 PetscCall(VecRestoreArrayRead(vB, &vb)); 2197 PetscCall(PetscFree(idxb)); 2198 PetscCall(VecDestroy(&vA)); 2199 PetscCall(VecDestroy(&vB)); 2200 PetscFunctionReturn(PETSC_SUCCESS); 2201 } 2202 2203 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2204 { 2205 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2206 PetscInt m = A->rmap->n; 2207 Vec vB, vA; 2208 2209 PetscFunctionBegin; 2210 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2211 PetscCall(MatGetRowSumAbs(a->A, vA)); 2212 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2213 PetscCall(MatGetRowSumAbs(a->B, vB)); 2214 PetscCall(VecAXPY(vA, 1.0, vB)); 2215 PetscCall(VecDestroy(&vB)); 2216 PetscCall(VecCopy(vA, v)); 2217 PetscCall(VecDestroy(&vA)); 2218 PetscFunctionReturn(PETSC_SUCCESS); 2219 } 2220 2221 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2222 { 2223 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2224 PetscInt m = A->rmap->n, n = A->cmap->n; 2225 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2226 PetscInt *cmap = mat->garray; 2227 PetscInt *diagIdx, *offdiagIdx; 2228 Vec diagV, offdiagV; 2229 PetscScalar *a, *diagA, *offdiagA; 2230 const PetscScalar *ba, *bav; 2231 PetscInt r, j, col, ncols, *bi, *bj; 2232 Mat B = mat->B; 2233 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2234 2235 PetscFunctionBegin; 2236 /* When a process holds entire A and other processes have no entry */ 2237 if (A->cmap->N == n) { 2238 PetscCall(VecGetArrayWrite(v, &diagA)); 2239 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2240 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2241 PetscCall(VecDestroy(&diagV)); 2242 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2243 PetscFunctionReturn(PETSC_SUCCESS); 2244 } else if (n == 0) { 2245 if (m) { 2246 PetscCall(VecGetArrayWrite(v, &a)); 2247 for (r = 0; r < m; r++) { 2248 a[r] = 0.0; 2249 if (idx) idx[r] = -1; 2250 } 2251 PetscCall(VecRestoreArrayWrite(v, &a)); 2252 } 2253 PetscFunctionReturn(PETSC_SUCCESS); 2254 } 2255 2256 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2257 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2258 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2259 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2260 2261 /* Get offdiagIdx[] for implicit 0.0 */ 2262 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2263 ba = bav; 2264 bi = b->i; 2265 bj = b->j; 2266 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2267 for (r = 0; r < m; r++) { 2268 ncols = bi[r + 1] - bi[r]; 2269 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2270 offdiagA[r] = *ba; 2271 offdiagIdx[r] = cmap[0]; 2272 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2273 offdiagA[r] = 0.0; 2274 2275 /* Find first hole in the cmap */ 2276 for (j = 0; j < ncols; j++) { 2277 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2278 if (col > j && j < cstart) { 2279 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2280 break; 2281 } else if (col > j + n && j >= cstart) { 2282 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2283 break; 2284 } 2285 } 2286 if (j == ncols && ncols < A->cmap->N - n) { 2287 /* a hole is outside compressed Bcols */ 2288 if (ncols == 0) { 2289 if (cstart) { 2290 offdiagIdx[r] = 0; 2291 } else offdiagIdx[r] = cend; 2292 } else { /* ncols > 0 */ 2293 offdiagIdx[r] = cmap[ncols - 1] + 1; 2294 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2295 } 2296 } 2297 } 2298 2299 for (j = 0; j < ncols; j++) { 2300 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2301 offdiagA[r] = *ba; 2302 offdiagIdx[r] = cmap[*bj]; 2303 } 2304 ba++; 2305 bj++; 2306 } 2307 } 2308 2309 PetscCall(VecGetArrayWrite(v, &a)); 2310 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2311 for (r = 0; r < m; ++r) { 2312 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 if (idx) idx[r] = cstart + diagIdx[r]; 2315 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2316 a[r] = diagA[r]; 2317 if (idx) { 2318 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2319 idx[r] = cstart + diagIdx[r]; 2320 } else idx[r] = offdiagIdx[r]; 2321 } 2322 } else { 2323 a[r] = offdiagA[r]; 2324 if (idx) idx[r] = offdiagIdx[r]; 2325 } 2326 } 2327 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2328 PetscCall(VecRestoreArrayWrite(v, &a)); 2329 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2330 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2331 PetscCall(VecDestroy(&diagV)); 2332 PetscCall(VecDestroy(&offdiagV)); 2333 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2334 PetscFunctionReturn(PETSC_SUCCESS); 2335 } 2336 2337 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2338 { 2339 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2340 PetscInt m = A->rmap->n, n = A->cmap->n; 2341 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2342 PetscInt *cmap = mat->garray; 2343 PetscInt *diagIdx, *offdiagIdx; 2344 Vec diagV, offdiagV; 2345 PetscScalar *a, *diagA, *offdiagA; 2346 const PetscScalar *ba, *bav; 2347 PetscInt r, j, col, ncols, *bi, *bj; 2348 Mat B = mat->B; 2349 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2350 2351 PetscFunctionBegin; 2352 /* When a process holds entire A and other processes have no entry */ 2353 if (A->cmap->N == n) { 2354 PetscCall(VecGetArrayWrite(v, &diagA)); 2355 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2356 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2357 PetscCall(VecDestroy(&diagV)); 2358 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2359 PetscFunctionReturn(PETSC_SUCCESS); 2360 } else if (n == 0) { 2361 if (m) { 2362 PetscCall(VecGetArrayWrite(v, &a)); 2363 for (r = 0; r < m; r++) { 2364 a[r] = PETSC_MAX_REAL; 2365 if (idx) idx[r] = -1; 2366 } 2367 PetscCall(VecRestoreArrayWrite(v, &a)); 2368 } 2369 PetscFunctionReturn(PETSC_SUCCESS); 2370 } 2371 2372 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2373 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2374 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2375 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2376 2377 /* Get offdiagIdx[] for implicit 0.0 */ 2378 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2379 ba = bav; 2380 bi = b->i; 2381 bj = b->j; 2382 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2383 for (r = 0; r < m; r++) { 2384 ncols = bi[r + 1] - bi[r]; 2385 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2386 offdiagA[r] = *ba; 2387 offdiagIdx[r] = cmap[0]; 2388 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2389 offdiagA[r] = 0.0; 2390 2391 /* Find first hole in the cmap */ 2392 for (j = 0; j < ncols; j++) { 2393 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2394 if (col > j && j < cstart) { 2395 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2396 break; 2397 } else if (col > j + n && j >= cstart) { 2398 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2399 break; 2400 } 2401 } 2402 if (j == ncols && ncols < A->cmap->N - n) { 2403 /* a hole is outside compressed Bcols */ 2404 if (ncols == 0) { 2405 if (cstart) { 2406 offdiagIdx[r] = 0; 2407 } else offdiagIdx[r] = cend; 2408 } else { /* ncols > 0 */ 2409 offdiagIdx[r] = cmap[ncols - 1] + 1; 2410 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2411 } 2412 } 2413 } 2414 2415 for (j = 0; j < ncols; j++) { 2416 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2417 offdiagA[r] = *ba; 2418 offdiagIdx[r] = cmap[*bj]; 2419 } 2420 ba++; 2421 bj++; 2422 } 2423 } 2424 2425 PetscCall(VecGetArrayWrite(v, &a)); 2426 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2427 for (r = 0; r < m; ++r) { 2428 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2429 a[r] = diagA[r]; 2430 if (idx) idx[r] = cstart + diagIdx[r]; 2431 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2432 a[r] = diagA[r]; 2433 if (idx) { 2434 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2435 idx[r] = cstart + diagIdx[r]; 2436 } else idx[r] = offdiagIdx[r]; 2437 } 2438 } else { 2439 a[r] = offdiagA[r]; 2440 if (idx) idx[r] = offdiagIdx[r]; 2441 } 2442 } 2443 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2444 PetscCall(VecRestoreArrayWrite(v, &a)); 2445 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2446 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2447 PetscCall(VecDestroy(&diagV)); 2448 PetscCall(VecDestroy(&offdiagV)); 2449 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2450 PetscFunctionReturn(PETSC_SUCCESS); 2451 } 2452 2453 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2454 { 2455 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2456 PetscInt m = A->rmap->n, n = A->cmap->n; 2457 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2458 PetscInt *cmap = mat->garray; 2459 PetscInt *diagIdx, *offdiagIdx; 2460 Vec diagV, offdiagV; 2461 PetscScalar *a, *diagA, *offdiagA; 2462 const PetscScalar *ba, *bav; 2463 PetscInt r, j, col, ncols, *bi, *bj; 2464 Mat B = mat->B; 2465 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2466 2467 PetscFunctionBegin; 2468 /* When a process holds entire A and other processes have no entry */ 2469 if (A->cmap->N == n) { 2470 PetscCall(VecGetArrayWrite(v, &diagA)); 2471 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2472 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2473 PetscCall(VecDestroy(&diagV)); 2474 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2475 PetscFunctionReturn(PETSC_SUCCESS); 2476 } else if (n == 0) { 2477 if (m) { 2478 PetscCall(VecGetArrayWrite(v, &a)); 2479 for (r = 0; r < m; r++) { 2480 a[r] = PETSC_MIN_REAL; 2481 if (idx) idx[r] = -1; 2482 } 2483 PetscCall(VecRestoreArrayWrite(v, &a)); 2484 } 2485 PetscFunctionReturn(PETSC_SUCCESS); 2486 } 2487 2488 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2489 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2490 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2491 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2492 2493 /* Get offdiagIdx[] for implicit 0.0 */ 2494 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2495 ba = bav; 2496 bi = b->i; 2497 bj = b->j; 2498 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2499 for (r = 0; r < m; r++) { 2500 ncols = bi[r + 1] - bi[r]; 2501 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2502 offdiagA[r] = *ba; 2503 offdiagIdx[r] = cmap[0]; 2504 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2505 offdiagA[r] = 0.0; 2506 2507 /* Find first hole in the cmap */ 2508 for (j = 0; j < ncols; j++) { 2509 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2510 if (col > j && j < cstart) { 2511 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2512 break; 2513 } else if (col > j + n && j >= cstart) { 2514 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2515 break; 2516 } 2517 } 2518 if (j == ncols && ncols < A->cmap->N - n) { 2519 /* a hole is outside compressed Bcols */ 2520 if (ncols == 0) { 2521 if (cstart) { 2522 offdiagIdx[r] = 0; 2523 } else offdiagIdx[r] = cend; 2524 } else { /* ncols > 0 */ 2525 offdiagIdx[r] = cmap[ncols - 1] + 1; 2526 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2527 } 2528 } 2529 } 2530 2531 for (j = 0; j < ncols; j++) { 2532 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2533 offdiagA[r] = *ba; 2534 offdiagIdx[r] = cmap[*bj]; 2535 } 2536 ba++; 2537 bj++; 2538 } 2539 } 2540 2541 PetscCall(VecGetArrayWrite(v, &a)); 2542 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2543 for (r = 0; r < m; ++r) { 2544 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 if (idx) idx[r] = cstart + diagIdx[r]; 2547 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2548 a[r] = diagA[r]; 2549 if (idx) { 2550 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2551 idx[r] = cstart + diagIdx[r]; 2552 } else idx[r] = offdiagIdx[r]; 2553 } 2554 } else { 2555 a[r] = offdiagA[r]; 2556 if (idx) idx[r] = offdiagIdx[r]; 2557 } 2558 } 2559 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2560 PetscCall(VecRestoreArrayWrite(v, &a)); 2561 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2562 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2563 PetscCall(VecDestroy(&diagV)); 2564 PetscCall(VecDestroy(&offdiagV)); 2565 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2566 PetscFunctionReturn(PETSC_SUCCESS); 2567 } 2568 2569 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2570 { 2571 Mat *dummy; 2572 2573 PetscFunctionBegin; 2574 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2575 *newmat = *dummy; 2576 PetscCall(PetscFree(dummy)); 2577 PetscFunctionReturn(PETSC_SUCCESS); 2578 } 2579 2580 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2581 { 2582 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2583 2584 PetscFunctionBegin; 2585 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2586 A->factorerrortype = a->A->factorerrortype; 2587 PetscFunctionReturn(PETSC_SUCCESS); 2588 } 2589 2590 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2591 { 2592 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2593 2594 PetscFunctionBegin; 2595 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2596 PetscCall(MatSetRandom(aij->A, rctx)); 2597 if (x->assembled) { 2598 PetscCall(MatSetRandom(aij->B, rctx)); 2599 } else { 2600 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2601 } 2602 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2603 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2604 PetscFunctionReturn(PETSC_SUCCESS); 2605 } 2606 2607 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2608 { 2609 PetscFunctionBegin; 2610 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2611 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2612 PetscFunctionReturn(PETSC_SUCCESS); 2613 } 2614 2615 /*@ 2616 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2617 2618 Not Collective 2619 2620 Input Parameter: 2621 . A - the matrix 2622 2623 Output Parameter: 2624 . nz - the number of nonzeros 2625 2626 Level: advanced 2627 2628 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2629 @*/ 2630 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2631 { 2632 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2633 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2634 PetscBool isaij; 2635 2636 PetscFunctionBegin; 2637 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2638 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2639 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2640 PetscFunctionReturn(PETSC_SUCCESS); 2641 } 2642 2643 /*@ 2644 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2645 2646 Collective 2647 2648 Input Parameters: 2649 + A - the matrix 2650 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2651 2652 Level: advanced 2653 2654 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2655 @*/ 2656 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2657 { 2658 PetscFunctionBegin; 2659 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2660 PetscFunctionReturn(PETSC_SUCCESS); 2661 } 2662 2663 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2664 { 2665 PetscBool sc = PETSC_FALSE, flg; 2666 2667 PetscFunctionBegin; 2668 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2669 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2670 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2671 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2672 PetscOptionsHeadEnd(); 2673 PetscFunctionReturn(PETSC_SUCCESS); 2674 } 2675 2676 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2677 { 2678 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2679 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2680 2681 PetscFunctionBegin; 2682 if (!Y->preallocated) { 2683 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2684 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2685 PetscInt nonew = aij->nonew; 2686 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2687 aij->nonew = nonew; 2688 } 2689 PetscCall(MatShift_Basic(Y, a)); 2690 PetscFunctionReturn(PETSC_SUCCESS); 2691 } 2692 2693 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2694 { 2695 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2696 2697 PetscFunctionBegin; 2698 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2699 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2700 if (d) { 2701 PetscInt rstart; 2702 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2703 *d += rstart; 2704 } 2705 PetscFunctionReturn(PETSC_SUCCESS); 2706 } 2707 2708 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2709 { 2710 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2711 2712 PetscFunctionBegin; 2713 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2714 PetscFunctionReturn(PETSC_SUCCESS); 2715 } 2716 2717 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2718 { 2719 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2720 2721 PetscFunctionBegin; 2722 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2723 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2724 PetscFunctionReturn(PETSC_SUCCESS); 2725 } 2726 2727 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2728 MatGetRow_MPIAIJ, 2729 MatRestoreRow_MPIAIJ, 2730 MatMult_MPIAIJ, 2731 /* 4*/ MatMultAdd_MPIAIJ, 2732 MatMultTranspose_MPIAIJ, 2733 MatMultTransposeAdd_MPIAIJ, 2734 NULL, 2735 NULL, 2736 NULL, 2737 /*10*/ NULL, 2738 NULL, 2739 NULL, 2740 MatSOR_MPIAIJ, 2741 MatTranspose_MPIAIJ, 2742 /*15*/ MatGetInfo_MPIAIJ, 2743 MatEqual_MPIAIJ, 2744 MatGetDiagonal_MPIAIJ, 2745 MatDiagonalScale_MPIAIJ, 2746 MatNorm_MPIAIJ, 2747 /*20*/ MatAssemblyBegin_MPIAIJ, 2748 MatAssemblyEnd_MPIAIJ, 2749 MatSetOption_MPIAIJ, 2750 MatZeroEntries_MPIAIJ, 2751 /*24*/ MatZeroRows_MPIAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 NULL, 2756 /*29*/ MatSetUp_MPI_Hash, 2757 NULL, 2758 NULL, 2759 MatGetDiagonalBlock_MPIAIJ, 2760 NULL, 2761 /*34*/ MatDuplicate_MPIAIJ, 2762 NULL, 2763 NULL, 2764 NULL, 2765 NULL, 2766 /*39*/ MatAXPY_MPIAIJ, 2767 MatCreateSubMatrices_MPIAIJ, 2768 MatIncreaseOverlap_MPIAIJ, 2769 MatGetValues_MPIAIJ, 2770 MatCopy_MPIAIJ, 2771 /*44*/ MatGetRowMax_MPIAIJ, 2772 MatScale_MPIAIJ, 2773 MatShift_MPIAIJ, 2774 MatDiagonalSet_MPIAIJ, 2775 MatZeroRowsColumns_MPIAIJ, 2776 /*49*/ MatSetRandom_MPIAIJ, 2777 MatGetRowIJ_MPIAIJ, 2778 MatRestoreRowIJ_MPIAIJ, 2779 NULL, 2780 NULL, 2781 /*54*/ MatFDColoringCreate_MPIXAIJ, 2782 NULL, 2783 MatSetUnfactored_MPIAIJ, 2784 MatPermute_MPIAIJ, 2785 NULL, 2786 /*59*/ MatCreateSubMatrix_MPIAIJ, 2787 MatDestroy_MPIAIJ, 2788 MatView_MPIAIJ, 2789 NULL, 2790 NULL, 2791 /*64*/ NULL, 2792 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2793 NULL, 2794 NULL, 2795 NULL, 2796 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2797 MatGetRowMinAbs_MPIAIJ, 2798 NULL, 2799 NULL, 2800 NULL, 2801 NULL, 2802 /*75*/ MatFDColoringApply_AIJ, 2803 MatSetFromOptions_MPIAIJ, 2804 NULL, 2805 NULL, 2806 MatFindZeroDiagonals_MPIAIJ, 2807 /*80*/ NULL, 2808 NULL, 2809 NULL, 2810 /*83*/ MatLoad_MPIAIJ, 2811 MatIsSymmetric_MPIAIJ, 2812 NULL, 2813 NULL, 2814 NULL, 2815 NULL, 2816 /*89*/ NULL, 2817 NULL, 2818 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2819 NULL, 2820 NULL, 2821 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2822 NULL, 2823 NULL, 2824 NULL, 2825 MatBindToCPU_MPIAIJ, 2826 /*99*/ MatProductSetFromOptions_MPIAIJ, 2827 NULL, 2828 NULL, 2829 MatConjugate_MPIAIJ, 2830 NULL, 2831 /*104*/ MatSetValuesRow_MPIAIJ, 2832 MatRealPart_MPIAIJ, 2833 MatImaginaryPart_MPIAIJ, 2834 NULL, 2835 NULL, 2836 /*109*/ NULL, 2837 NULL, 2838 MatGetRowMin_MPIAIJ, 2839 NULL, 2840 MatMissingDiagonal_MPIAIJ, 2841 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2842 NULL, 2843 MatGetGhosts_MPIAIJ, 2844 NULL, 2845 NULL, 2846 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2847 NULL, 2848 NULL, 2849 NULL, 2850 MatGetMultiProcBlock_MPIAIJ, 2851 /*124*/ MatFindNonzeroRows_MPIAIJ, 2852 MatGetColumnReductions_MPIAIJ, 2853 MatInvertBlockDiagonal_MPIAIJ, 2854 MatInvertVariableBlockDiagonal_MPIAIJ, 2855 MatCreateSubMatricesMPI_MPIAIJ, 2856 /*129*/ NULL, 2857 NULL, 2858 NULL, 2859 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2860 NULL, 2861 /*134*/ NULL, 2862 NULL, 2863 NULL, 2864 NULL, 2865 NULL, 2866 /*139*/ MatSetBlockSizes_MPIAIJ, 2867 NULL, 2868 NULL, 2869 MatFDColoringSetUp_MPIXAIJ, 2870 MatFindOffBlockDiagonalEntries_MPIAIJ, 2871 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2872 /*145*/ NULL, 2873 NULL, 2874 NULL, 2875 MatCreateGraph_Simple_AIJ, 2876 NULL, 2877 /*150*/ NULL, 2878 MatEliminateZeros_MPIAIJ, 2879 MatGetRowSumAbs_MPIAIJ}; 2880 2881 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2882 { 2883 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2884 2885 PetscFunctionBegin; 2886 PetscCall(MatStoreValues(aij->A)); 2887 PetscCall(MatStoreValues(aij->B)); 2888 PetscFunctionReturn(PETSC_SUCCESS); 2889 } 2890 2891 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2892 { 2893 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2894 2895 PetscFunctionBegin; 2896 PetscCall(MatRetrieveValues(aij->A)); 2897 PetscCall(MatRetrieveValues(aij->B)); 2898 PetscFunctionReturn(PETSC_SUCCESS); 2899 } 2900 2901 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2902 { 2903 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2904 PetscMPIInt size; 2905 2906 PetscFunctionBegin; 2907 if (B->hash_active) { 2908 B->ops[0] = b->cops; 2909 B->hash_active = PETSC_FALSE; 2910 } 2911 PetscCall(PetscLayoutSetUp(B->rmap)); 2912 PetscCall(PetscLayoutSetUp(B->cmap)); 2913 2914 #if defined(PETSC_USE_CTABLE) 2915 PetscCall(PetscHMapIDestroy(&b->colmap)); 2916 #else 2917 PetscCall(PetscFree(b->colmap)); 2918 #endif 2919 PetscCall(PetscFree(b->garray)); 2920 PetscCall(VecDestroy(&b->lvec)); 2921 PetscCall(VecScatterDestroy(&b->Mvctx)); 2922 2923 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2924 PetscCall(MatDestroy(&b->B)); 2925 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2926 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2927 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2928 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2929 2930 PetscCall(MatDestroy(&b->A)); 2931 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2932 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2933 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2934 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2935 2936 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2937 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2938 B->preallocated = PETSC_TRUE; 2939 B->was_assembled = PETSC_FALSE; 2940 B->assembled = PETSC_FALSE; 2941 PetscFunctionReturn(PETSC_SUCCESS); 2942 } 2943 2944 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2945 { 2946 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2947 2948 PetscFunctionBegin; 2949 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2950 PetscCall(PetscLayoutSetUp(B->rmap)); 2951 PetscCall(PetscLayoutSetUp(B->cmap)); 2952 2953 #if defined(PETSC_USE_CTABLE) 2954 PetscCall(PetscHMapIDestroy(&b->colmap)); 2955 #else 2956 PetscCall(PetscFree(b->colmap)); 2957 #endif 2958 PetscCall(PetscFree(b->garray)); 2959 PetscCall(VecDestroy(&b->lvec)); 2960 PetscCall(VecScatterDestroy(&b->Mvctx)); 2961 2962 PetscCall(MatResetPreallocation(b->A)); 2963 PetscCall(MatResetPreallocation(b->B)); 2964 B->preallocated = PETSC_TRUE; 2965 B->was_assembled = PETSC_FALSE; 2966 B->assembled = PETSC_FALSE; 2967 PetscFunctionReturn(PETSC_SUCCESS); 2968 } 2969 2970 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2971 { 2972 Mat mat; 2973 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2974 2975 PetscFunctionBegin; 2976 *newmat = NULL; 2977 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2978 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2979 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2980 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2981 a = (Mat_MPIAIJ *)mat->data; 2982 2983 mat->factortype = matin->factortype; 2984 mat->assembled = matin->assembled; 2985 mat->insertmode = NOT_SET_VALUES; 2986 2987 a->size = oldmat->size; 2988 a->rank = oldmat->rank; 2989 a->donotstash = oldmat->donotstash; 2990 a->roworiented = oldmat->roworiented; 2991 a->rowindices = NULL; 2992 a->rowvalues = NULL; 2993 a->getrowactive = PETSC_FALSE; 2994 2995 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2996 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2997 if (matin->hash_active) { 2998 PetscCall(MatSetUp(mat)); 2999 } else { 3000 mat->preallocated = matin->preallocated; 3001 if (oldmat->colmap) { 3002 #if defined(PETSC_USE_CTABLE) 3003 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3004 #else 3005 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3006 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3007 #endif 3008 } else a->colmap = NULL; 3009 if (oldmat->garray) { 3010 PetscInt len; 3011 len = oldmat->B->cmap->n; 3012 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3013 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3014 } else a->garray = NULL; 3015 3016 /* It may happen MatDuplicate is called with a non-assembled matrix 3017 In fact, MatDuplicate only requires the matrix to be preallocated 3018 This may happen inside a DMCreateMatrix_Shell */ 3019 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3020 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3021 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3022 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3023 } 3024 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3025 *newmat = mat; 3026 PetscFunctionReturn(PETSC_SUCCESS); 3027 } 3028 3029 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3030 { 3031 PetscBool isbinary, ishdf5; 3032 3033 PetscFunctionBegin; 3034 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3035 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3036 /* force binary viewer to load .info file if it has not yet done so */ 3037 PetscCall(PetscViewerSetUp(viewer)); 3038 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3039 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3040 if (isbinary) { 3041 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3042 } else if (ishdf5) { 3043 #if defined(PETSC_HAVE_HDF5) 3044 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3045 #else 3046 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3047 #endif 3048 } else { 3049 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3050 } 3051 PetscFunctionReturn(PETSC_SUCCESS); 3052 } 3053 3054 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3055 { 3056 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3057 PetscInt *rowidxs, *colidxs; 3058 PetscScalar *matvals; 3059 3060 PetscFunctionBegin; 3061 PetscCall(PetscViewerSetUp(viewer)); 3062 3063 /* read in matrix header */ 3064 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3065 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3066 M = header[1]; 3067 N = header[2]; 3068 nz = header[3]; 3069 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3070 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3071 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3072 3073 /* set block sizes from the viewer's .info file */ 3074 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3075 /* set global sizes if not set already */ 3076 if (mat->rmap->N < 0) mat->rmap->N = M; 3077 if (mat->cmap->N < 0) mat->cmap->N = N; 3078 PetscCall(PetscLayoutSetUp(mat->rmap)); 3079 PetscCall(PetscLayoutSetUp(mat->cmap)); 3080 3081 /* check if the matrix sizes are correct */ 3082 PetscCall(MatGetSize(mat, &rows, &cols)); 3083 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3084 3085 /* read in row lengths and build row indices */ 3086 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3087 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3088 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3089 rowidxs[0] = 0; 3090 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3091 if (nz != PETSC_MAX_INT) { 3092 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3093 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3094 } 3095 3096 /* read in column indices and matrix values */ 3097 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3098 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3100 /* store matrix indices and values */ 3101 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3102 PetscCall(PetscFree(rowidxs)); 3103 PetscCall(PetscFree2(colidxs, matvals)); 3104 PetscFunctionReturn(PETSC_SUCCESS); 3105 } 3106 3107 /* Not scalable because of ISAllGather() unless getting all columns. */ 3108 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3109 { 3110 IS iscol_local; 3111 PetscBool isstride; 3112 PetscMPIInt lisstride = 0, gisstride; 3113 3114 PetscFunctionBegin; 3115 /* check if we are grabbing all columns*/ 3116 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3117 3118 if (isstride) { 3119 PetscInt start, len, mstart, mlen; 3120 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3121 PetscCall(ISGetLocalSize(iscol, &len)); 3122 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3123 if (mstart == start && mlen - mstart == len) lisstride = 1; 3124 } 3125 3126 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3127 if (gisstride) { 3128 PetscInt N; 3129 PetscCall(MatGetSize(mat, NULL, &N)); 3130 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3131 PetscCall(ISSetIdentity(iscol_local)); 3132 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3133 } else { 3134 PetscInt cbs; 3135 PetscCall(ISGetBlockSize(iscol, &cbs)); 3136 PetscCall(ISAllGather(iscol, &iscol_local)); 3137 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3138 } 3139 3140 *isseq = iscol_local; 3141 PetscFunctionReturn(PETSC_SUCCESS); 3142 } 3143 3144 /* 3145 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3146 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3147 3148 Input Parameters: 3149 + mat - matrix 3150 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3151 i.e., mat->rstart <= isrow[i] < mat->rend 3152 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3153 i.e., mat->cstart <= iscol[i] < mat->cend 3154 3155 Output Parameters: 3156 + isrow_d - sequential row index set for retrieving mat->A 3157 . iscol_d - sequential column index set for retrieving mat->A 3158 . iscol_o - sequential column index set for retrieving mat->B 3159 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3160 */ 3161 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3162 { 3163 Vec x, cmap; 3164 const PetscInt *is_idx; 3165 PetscScalar *xarray, *cmaparray; 3166 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3167 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3168 Mat B = a->B; 3169 Vec lvec = a->lvec, lcmap; 3170 PetscInt i, cstart, cend, Bn = B->cmap->N; 3171 MPI_Comm comm; 3172 VecScatter Mvctx = a->Mvctx; 3173 3174 PetscFunctionBegin; 3175 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3176 PetscCall(ISGetLocalSize(iscol, &ncols)); 3177 3178 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3179 PetscCall(MatCreateVecs(mat, &x, NULL)); 3180 PetscCall(VecSet(x, -1.0)); 3181 PetscCall(VecDuplicate(x, &cmap)); 3182 PetscCall(VecSet(cmap, -1.0)); 3183 3184 /* Get start indices */ 3185 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3186 isstart -= ncols; 3187 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3188 3189 PetscCall(ISGetIndices(iscol, &is_idx)); 3190 PetscCall(VecGetArray(x, &xarray)); 3191 PetscCall(VecGetArray(cmap, &cmaparray)); 3192 PetscCall(PetscMalloc1(ncols, &idx)); 3193 for (i = 0; i < ncols; i++) { 3194 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3195 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3196 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3197 } 3198 PetscCall(VecRestoreArray(x, &xarray)); 3199 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3200 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3201 3202 /* Get iscol_d */ 3203 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3204 PetscCall(ISGetBlockSize(iscol, &i)); 3205 PetscCall(ISSetBlockSize(*iscol_d, i)); 3206 3207 /* Get isrow_d */ 3208 PetscCall(ISGetLocalSize(isrow, &m)); 3209 rstart = mat->rmap->rstart; 3210 PetscCall(PetscMalloc1(m, &idx)); 3211 PetscCall(ISGetIndices(isrow, &is_idx)); 3212 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3213 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3214 3215 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3216 PetscCall(ISGetBlockSize(isrow, &i)); 3217 PetscCall(ISSetBlockSize(*isrow_d, i)); 3218 3219 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3220 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3222 3223 PetscCall(VecDuplicate(lvec, &lcmap)); 3224 3225 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3227 3228 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3229 /* off-process column indices */ 3230 count = 0; 3231 PetscCall(PetscMalloc1(Bn, &idx)); 3232 PetscCall(PetscMalloc1(Bn, &cmap1)); 3233 3234 PetscCall(VecGetArray(lvec, &xarray)); 3235 PetscCall(VecGetArray(lcmap, &cmaparray)); 3236 for (i = 0; i < Bn; i++) { 3237 if (PetscRealPart(xarray[i]) > -1.0) { 3238 idx[count] = i; /* local column index in off-diagonal part B */ 3239 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3240 count++; 3241 } 3242 } 3243 PetscCall(VecRestoreArray(lvec, &xarray)); 3244 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3245 3246 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3247 /* cannot ensure iscol_o has same blocksize as iscol! */ 3248 3249 PetscCall(PetscFree(idx)); 3250 *garray = cmap1; 3251 3252 PetscCall(VecDestroy(&x)); 3253 PetscCall(VecDestroy(&cmap)); 3254 PetscCall(VecDestroy(&lcmap)); 3255 PetscFunctionReturn(PETSC_SUCCESS); 3256 } 3257 3258 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3259 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3260 { 3261 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3262 Mat M = NULL; 3263 MPI_Comm comm; 3264 IS iscol_d, isrow_d, iscol_o; 3265 Mat Asub = NULL, Bsub = NULL; 3266 PetscInt n; 3267 3268 PetscFunctionBegin; 3269 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3270 3271 if (call == MAT_REUSE_MATRIX) { 3272 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3273 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3274 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3275 3276 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3277 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3278 3279 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3280 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3281 3282 /* Update diagonal and off-diagonal portions of submat */ 3283 asub = (Mat_MPIAIJ *)(*submat)->data; 3284 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3285 PetscCall(ISGetLocalSize(iscol_o, &n)); 3286 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3287 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3288 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3289 3290 } else { /* call == MAT_INITIAL_MATRIX) */ 3291 const PetscInt *garray; 3292 PetscInt BsubN; 3293 3294 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3295 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3296 3297 /* Create local submatrices Asub and Bsub */ 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3299 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3300 3301 /* Create submatrix M */ 3302 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3303 3304 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3305 asub = (Mat_MPIAIJ *)M->data; 3306 3307 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3308 n = asub->B->cmap->N; 3309 if (BsubN > n) { 3310 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3311 const PetscInt *idx; 3312 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3313 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3314 3315 PetscCall(PetscMalloc1(n, &idx_new)); 3316 j = 0; 3317 PetscCall(ISGetIndices(iscol_o, &idx)); 3318 for (i = 0; i < n; i++) { 3319 if (j >= BsubN) break; 3320 while (subgarray[i] > garray[j]) j++; 3321 3322 if (subgarray[i] == garray[j]) { 3323 idx_new[i] = idx[j++]; 3324 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3325 } 3326 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3327 3328 PetscCall(ISDestroy(&iscol_o)); 3329 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3330 3331 } else if (BsubN < n) { 3332 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3333 } 3334 3335 PetscCall(PetscFree(garray)); 3336 *submat = M; 3337 3338 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3339 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3340 PetscCall(ISDestroy(&isrow_d)); 3341 3342 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3343 PetscCall(ISDestroy(&iscol_d)); 3344 3345 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3346 PetscCall(ISDestroy(&iscol_o)); 3347 } 3348 PetscFunctionReturn(PETSC_SUCCESS); 3349 } 3350 3351 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3352 { 3353 IS iscol_local = NULL, isrow_d; 3354 PetscInt csize; 3355 PetscInt n, i, j, start, end; 3356 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3357 MPI_Comm comm; 3358 3359 PetscFunctionBegin; 3360 /* If isrow has same processor distribution as mat, 3361 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3362 if (call == MAT_REUSE_MATRIX) { 3363 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3364 if (isrow_d) { 3365 sameRowDist = PETSC_TRUE; 3366 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3367 } else { 3368 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3369 if (iscol_local) { 3370 sameRowDist = PETSC_TRUE; 3371 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3372 } 3373 } 3374 } else { 3375 /* Check if isrow has same processor distribution as mat */ 3376 sameDist[0] = PETSC_FALSE; 3377 PetscCall(ISGetLocalSize(isrow, &n)); 3378 if (!n) { 3379 sameDist[0] = PETSC_TRUE; 3380 } else { 3381 PetscCall(ISGetMinMax(isrow, &i, &j)); 3382 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3383 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3384 } 3385 3386 /* Check if iscol has same processor distribution as mat */ 3387 sameDist[1] = PETSC_FALSE; 3388 PetscCall(ISGetLocalSize(iscol, &n)); 3389 if (!n) { 3390 sameDist[1] = PETSC_TRUE; 3391 } else { 3392 PetscCall(ISGetMinMax(iscol, &i, &j)); 3393 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3394 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3395 } 3396 3397 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3398 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3399 sameRowDist = tsameDist[0]; 3400 } 3401 3402 if (sameRowDist) { 3403 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3404 /* isrow and iscol have same processor distribution as mat */ 3405 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3406 PetscFunctionReturn(PETSC_SUCCESS); 3407 } else { /* sameRowDist */ 3408 /* isrow has same processor distribution as mat */ 3409 if (call == MAT_INITIAL_MATRIX) { 3410 PetscBool sorted; 3411 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3412 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3413 PetscCall(ISGetSize(iscol, &i)); 3414 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3415 3416 PetscCall(ISSorted(iscol_local, &sorted)); 3417 if (sorted) { 3418 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3419 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3420 PetscFunctionReturn(PETSC_SUCCESS); 3421 } 3422 } else { /* call == MAT_REUSE_MATRIX */ 3423 IS iscol_sub; 3424 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3425 if (iscol_sub) { 3426 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3427 PetscFunctionReturn(PETSC_SUCCESS); 3428 } 3429 } 3430 } 3431 } 3432 3433 /* General case: iscol -> iscol_local which has global size of iscol */ 3434 if (call == MAT_REUSE_MATRIX) { 3435 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3436 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3437 } else { 3438 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3439 } 3440 3441 PetscCall(ISGetLocalSize(iscol, &csize)); 3442 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3443 3444 if (call == MAT_INITIAL_MATRIX) { 3445 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3446 PetscCall(ISDestroy(&iscol_local)); 3447 } 3448 PetscFunctionReturn(PETSC_SUCCESS); 3449 } 3450 3451 /*@C 3452 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3453 and "off-diagonal" part of the matrix in CSR format. 3454 3455 Collective 3456 3457 Input Parameters: 3458 + comm - MPI communicator 3459 . A - "diagonal" portion of matrix 3460 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3461 - garray - global index of `B` columns 3462 3463 Output Parameter: 3464 . mat - the matrix, with input `A` as its local diagonal matrix 3465 3466 Level: advanced 3467 3468 Notes: 3469 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3470 3471 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3472 3473 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3474 @*/ 3475 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3476 { 3477 Mat_MPIAIJ *maij; 3478 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3479 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3480 const PetscScalar *oa; 3481 Mat Bnew; 3482 PetscInt m, n, N; 3483 MatType mpi_mat_type; 3484 3485 PetscFunctionBegin; 3486 PetscCall(MatCreate(comm, mat)); 3487 PetscCall(MatGetSize(A, &m, &n)); 3488 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3489 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3490 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3491 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3492 3493 /* Get global columns of mat */ 3494 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3495 3496 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3497 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3498 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3499 PetscCall(MatSetType(*mat, mpi_mat_type)); 3500 3501 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3502 maij = (Mat_MPIAIJ *)(*mat)->data; 3503 3504 (*mat)->preallocated = PETSC_TRUE; 3505 3506 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3507 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3508 3509 /* Set A as diagonal portion of *mat */ 3510 maij->A = A; 3511 3512 nz = oi[m]; 3513 for (i = 0; i < nz; i++) { 3514 col = oj[i]; 3515 oj[i] = garray[col]; 3516 } 3517 3518 /* Set Bnew as off-diagonal portion of *mat */ 3519 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3520 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3521 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3522 bnew = (Mat_SeqAIJ *)Bnew->data; 3523 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3524 maij->B = Bnew; 3525 3526 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3527 3528 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3529 b->free_a = PETSC_FALSE; 3530 b->free_ij = PETSC_FALSE; 3531 PetscCall(MatDestroy(&B)); 3532 3533 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3534 bnew->free_a = PETSC_TRUE; 3535 bnew->free_ij = PETSC_TRUE; 3536 3537 /* condense columns of maij->B */ 3538 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3539 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3540 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3541 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3542 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3543 PetscFunctionReturn(PETSC_SUCCESS); 3544 } 3545 3546 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3547 3548 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3549 { 3550 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3551 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3552 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3553 Mat M, Msub, B = a->B; 3554 MatScalar *aa; 3555 Mat_SeqAIJ *aij; 3556 PetscInt *garray = a->garray, *colsub, Ncols; 3557 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3558 IS iscol_sub, iscmap; 3559 const PetscInt *is_idx, *cmap; 3560 PetscBool allcolumns = PETSC_FALSE; 3561 MPI_Comm comm; 3562 3563 PetscFunctionBegin; 3564 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3565 if (call == MAT_REUSE_MATRIX) { 3566 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3567 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3568 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3569 3570 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3571 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3572 3573 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3574 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3575 3576 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3577 3578 } else { /* call == MAT_INITIAL_MATRIX) */ 3579 PetscBool flg; 3580 3581 PetscCall(ISGetLocalSize(iscol, &n)); 3582 PetscCall(ISGetSize(iscol, &Ncols)); 3583 3584 /* (1) iscol -> nonscalable iscol_local */ 3585 /* Check for special case: each processor gets entire matrix columns */ 3586 PetscCall(ISIdentity(iscol_local, &flg)); 3587 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3588 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3589 if (allcolumns) { 3590 iscol_sub = iscol_local; 3591 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3592 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3593 3594 } else { 3595 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3596 PetscInt *idx, *cmap1, k; 3597 PetscCall(PetscMalloc1(Ncols, &idx)); 3598 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3599 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3600 count = 0; 3601 k = 0; 3602 for (i = 0; i < Ncols; i++) { 3603 j = is_idx[i]; 3604 if (j >= cstart && j < cend) { 3605 /* diagonal part of mat */ 3606 idx[count] = j; 3607 cmap1[count++] = i; /* column index in submat */ 3608 } else if (Bn) { 3609 /* off-diagonal part of mat */ 3610 if (j == garray[k]) { 3611 idx[count] = j; 3612 cmap1[count++] = i; /* column index in submat */ 3613 } else if (j > garray[k]) { 3614 while (j > garray[k] && k < Bn - 1) k++; 3615 if (j == garray[k]) { 3616 idx[count] = j; 3617 cmap1[count++] = i; /* column index in submat */ 3618 } 3619 } 3620 } 3621 } 3622 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3623 3624 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3625 PetscCall(ISGetBlockSize(iscol, &cbs)); 3626 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3627 3628 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3629 } 3630 3631 /* (3) Create sequential Msub */ 3632 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3633 } 3634 3635 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3636 aij = (Mat_SeqAIJ *)(Msub)->data; 3637 ii = aij->i; 3638 PetscCall(ISGetIndices(iscmap, &cmap)); 3639 3640 /* 3641 m - number of local rows 3642 Ncols - number of columns (same on all processors) 3643 rstart - first row in new global matrix generated 3644 */ 3645 PetscCall(MatGetSize(Msub, &m, NULL)); 3646 3647 if (call == MAT_INITIAL_MATRIX) { 3648 /* (4) Create parallel newmat */ 3649 PetscMPIInt rank, size; 3650 PetscInt csize; 3651 3652 PetscCallMPI(MPI_Comm_size(comm, &size)); 3653 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3654 3655 /* 3656 Determine the number of non-zeros in the diagonal and off-diagonal 3657 portions of the matrix in order to do correct preallocation 3658 */ 3659 3660 /* first get start and end of "diagonal" columns */ 3661 PetscCall(ISGetLocalSize(iscol, &csize)); 3662 if (csize == PETSC_DECIDE) { 3663 PetscCall(ISGetSize(isrow, &mglobal)); 3664 if (mglobal == Ncols) { /* square matrix */ 3665 nlocal = m; 3666 } else { 3667 nlocal = Ncols / size + ((Ncols % size) > rank); 3668 } 3669 } else { 3670 nlocal = csize; 3671 } 3672 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3673 rstart = rend - nlocal; 3674 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3675 3676 /* next, compute all the lengths */ 3677 jj = aij->j; 3678 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3679 olens = dlens + m; 3680 for (i = 0; i < m; i++) { 3681 jend = ii[i + 1] - ii[i]; 3682 olen = 0; 3683 dlen = 0; 3684 for (j = 0; j < jend; j++) { 3685 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3686 else dlen++; 3687 jj++; 3688 } 3689 olens[i] = olen; 3690 dlens[i] = dlen; 3691 } 3692 3693 PetscCall(ISGetBlockSize(isrow, &bs)); 3694 PetscCall(ISGetBlockSize(iscol, &cbs)); 3695 3696 PetscCall(MatCreate(comm, &M)); 3697 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3698 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3699 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3700 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3701 PetscCall(PetscFree(dlens)); 3702 3703 } else { /* call == MAT_REUSE_MATRIX */ 3704 M = *newmat; 3705 PetscCall(MatGetLocalSize(M, &i, NULL)); 3706 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3707 PetscCall(MatZeroEntries(M)); 3708 /* 3709 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3710 rather than the slower MatSetValues(). 3711 */ 3712 M->was_assembled = PETSC_TRUE; 3713 M->assembled = PETSC_FALSE; 3714 } 3715 3716 /* (5) Set values of Msub to *newmat */ 3717 PetscCall(PetscMalloc1(count, &colsub)); 3718 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3719 3720 jj = aij->j; 3721 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3722 for (i = 0; i < m; i++) { 3723 row = rstart + i; 3724 nz = ii[i + 1] - ii[i]; 3725 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3726 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3727 jj += nz; 3728 aa += nz; 3729 } 3730 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3731 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3732 3733 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3734 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3735 3736 PetscCall(PetscFree(colsub)); 3737 3738 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3739 if (call == MAT_INITIAL_MATRIX) { 3740 *newmat = M; 3741 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3742 PetscCall(MatDestroy(&Msub)); 3743 3744 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3745 PetscCall(ISDestroy(&iscol_sub)); 3746 3747 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3748 PetscCall(ISDestroy(&iscmap)); 3749 3750 if (iscol_local) { 3751 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3752 PetscCall(ISDestroy(&iscol_local)); 3753 } 3754 } 3755 PetscFunctionReturn(PETSC_SUCCESS); 3756 } 3757 3758 /* 3759 Not great since it makes two copies of the submatrix, first an SeqAIJ 3760 in local and then by concatenating the local matrices the end result. 3761 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3762 3763 This requires a sequential iscol with all indices. 3764 */ 3765 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3766 { 3767 PetscMPIInt rank, size; 3768 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3769 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3770 Mat M, Mreuse; 3771 MatScalar *aa, *vwork; 3772 MPI_Comm comm; 3773 Mat_SeqAIJ *aij; 3774 PetscBool colflag, allcolumns = PETSC_FALSE; 3775 3776 PetscFunctionBegin; 3777 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3778 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3779 PetscCallMPI(MPI_Comm_size(comm, &size)); 3780 3781 /* Check for special case: each processor gets entire matrix columns */ 3782 PetscCall(ISIdentity(iscol, &colflag)); 3783 PetscCall(ISGetLocalSize(iscol, &n)); 3784 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3785 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3786 3787 if (call == MAT_REUSE_MATRIX) { 3788 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3789 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3790 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3791 } else { 3792 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3793 } 3794 3795 /* 3796 m - number of local rows 3797 n - number of columns (same on all processors) 3798 rstart - first row in new global matrix generated 3799 */ 3800 PetscCall(MatGetSize(Mreuse, &m, &n)); 3801 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3802 if (call == MAT_INITIAL_MATRIX) { 3803 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3804 ii = aij->i; 3805 jj = aij->j; 3806 3807 /* 3808 Determine the number of non-zeros in the diagonal and off-diagonal 3809 portions of the matrix in order to do correct preallocation 3810 */ 3811 3812 /* first get start and end of "diagonal" columns */ 3813 if (csize == PETSC_DECIDE) { 3814 PetscCall(ISGetSize(isrow, &mglobal)); 3815 if (mglobal == n) { /* square matrix */ 3816 nlocal = m; 3817 } else { 3818 nlocal = n / size + ((n % size) > rank); 3819 } 3820 } else { 3821 nlocal = csize; 3822 } 3823 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3824 rstart = rend - nlocal; 3825 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3826 3827 /* next, compute all the lengths */ 3828 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3829 olens = dlens + m; 3830 for (i = 0; i < m; i++) { 3831 jend = ii[i + 1] - ii[i]; 3832 olen = 0; 3833 dlen = 0; 3834 for (j = 0; j < jend; j++) { 3835 if (*jj < rstart || *jj >= rend) olen++; 3836 else dlen++; 3837 jj++; 3838 } 3839 olens[i] = olen; 3840 dlens[i] = dlen; 3841 } 3842 PetscCall(MatCreate(comm, &M)); 3843 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3844 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3845 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3846 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3847 PetscCall(PetscFree(dlens)); 3848 } else { 3849 PetscInt ml, nl; 3850 3851 M = *newmat; 3852 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3853 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3854 PetscCall(MatZeroEntries(M)); 3855 /* 3856 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3857 rather than the slower MatSetValues(). 3858 */ 3859 M->was_assembled = PETSC_TRUE; 3860 M->assembled = PETSC_FALSE; 3861 } 3862 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3863 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3864 ii = aij->i; 3865 jj = aij->j; 3866 3867 /* trigger copy to CPU if needed */ 3868 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3869 for (i = 0; i < m; i++) { 3870 row = rstart + i; 3871 nz = ii[i + 1] - ii[i]; 3872 cwork = jj; 3873 jj = PetscSafePointerPlusOffset(jj, nz); 3874 vwork = aa; 3875 aa = PetscSafePointerPlusOffset(aa, nz); 3876 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3877 } 3878 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3879 3880 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3881 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3882 *newmat = M; 3883 3884 /* save submatrix used in processor for next request */ 3885 if (call == MAT_INITIAL_MATRIX) { 3886 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3887 PetscCall(MatDestroy(&Mreuse)); 3888 } 3889 PetscFunctionReturn(PETSC_SUCCESS); 3890 } 3891 3892 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3893 { 3894 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3895 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3896 const PetscInt *JJ; 3897 PetscBool nooffprocentries; 3898 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3899 3900 PetscFunctionBegin; 3901 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3902 3903 PetscCall(PetscLayoutSetUp(B->rmap)); 3904 PetscCall(PetscLayoutSetUp(B->cmap)); 3905 m = B->rmap->n; 3906 cstart = B->cmap->rstart; 3907 cend = B->cmap->rend; 3908 rstart = B->rmap->rstart; 3909 3910 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3911 3912 if (PetscDefined(USE_DEBUG)) { 3913 for (i = 0; i < m; i++) { 3914 nnz = Ii[i + 1] - Ii[i]; 3915 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3916 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3917 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3918 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3919 } 3920 } 3921 3922 for (i = 0; i < m; i++) { 3923 nnz = Ii[i + 1] - Ii[i]; 3924 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3925 nnz_max = PetscMax(nnz_max, nnz); 3926 d = 0; 3927 for (j = 0; j < nnz; j++) { 3928 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3929 } 3930 d_nnz[i] = d; 3931 o_nnz[i] = nnz - d; 3932 } 3933 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3934 PetscCall(PetscFree2(d_nnz, o_nnz)); 3935 3936 for (i = 0; i < m; i++) { 3937 ii = i + rstart; 3938 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES)); 3939 } 3940 nooffprocentries = B->nooffprocentries; 3941 B->nooffprocentries = PETSC_TRUE; 3942 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3943 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3944 B->nooffprocentries = nooffprocentries; 3945 3946 /* count number of entries below block diagonal */ 3947 PetscCall(PetscFree(Aij->ld)); 3948 PetscCall(PetscCalloc1(m, &ld)); 3949 Aij->ld = ld; 3950 for (i = 0; i < m; i++) { 3951 nnz = Ii[i + 1] - Ii[i]; 3952 j = 0; 3953 while (j < nnz && J[j] < cstart) j++; 3954 ld[i] = j; 3955 if (J) J += nnz; 3956 } 3957 3958 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3959 PetscFunctionReturn(PETSC_SUCCESS); 3960 } 3961 3962 /*@ 3963 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3964 (the default parallel PETSc format). 3965 3966 Collective 3967 3968 Input Parameters: 3969 + B - the matrix 3970 . i - the indices into j for the start of each local row (starts with zero) 3971 . j - the column indices for each local row (starts with zero) 3972 - v - optional values in the matrix 3973 3974 Level: developer 3975 3976 Notes: 3977 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3978 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3979 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3980 3981 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3982 3983 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3984 3985 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3986 3987 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3988 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3989 3990 The format which is used for the sparse matrix input, is equivalent to a 3991 row-major ordering.. i.e for the following matrix, the input data expected is 3992 as shown 3993 .vb 3994 1 0 0 3995 2 0 3 P0 3996 ------- 3997 4 5 6 P1 3998 3999 Process0 [P0] rows_owned=[0,1] 4000 i = {0,1,3} [size = nrow+1 = 2+1] 4001 j = {0,0,2} [size = 3] 4002 v = {1,2,3} [size = 3] 4003 4004 Process1 [P1] rows_owned=[2] 4005 i = {0,3} [size = nrow+1 = 1+1] 4006 j = {0,1,2} [size = 3] 4007 v = {4,5,6} [size = 3] 4008 .ve 4009 4010 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4011 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4012 @*/ 4013 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4014 { 4015 PetscFunctionBegin; 4016 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4017 PetscFunctionReturn(PETSC_SUCCESS); 4018 } 4019 4020 /*@C 4021 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4022 (the default parallel PETSc format). For good matrix assembly performance 4023 the user should preallocate the matrix storage by setting the parameters 4024 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4025 4026 Collective 4027 4028 Input Parameters: 4029 + B - the matrix 4030 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4031 (same value is used for all local rows) 4032 . d_nnz - array containing the number of nonzeros in the various rows of the 4033 DIAGONAL portion of the local submatrix (possibly different for each row) 4034 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4035 The size of this array is equal to the number of local rows, i.e 'm'. 4036 For matrices that will be factored, you must leave room for (and set) 4037 the diagonal entry even if it is zero. 4038 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4039 submatrix (same value is used for all local rows). 4040 - o_nnz - array containing the number of nonzeros in the various rows of the 4041 OFF-DIAGONAL portion of the local submatrix (possibly different for 4042 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4043 structure. The size of this array is equal to the number 4044 of local rows, i.e 'm'. 4045 4046 Example Usage: 4047 Consider the following 8x8 matrix with 34 non-zero values, that is 4048 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4049 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4050 as follows 4051 4052 .vb 4053 1 2 0 | 0 3 0 | 0 4 4054 Proc0 0 5 6 | 7 0 0 | 8 0 4055 9 0 10 | 11 0 0 | 12 0 4056 ------------------------------------- 4057 13 0 14 | 15 16 17 | 0 0 4058 Proc1 0 18 0 | 19 20 21 | 0 0 4059 0 0 0 | 22 23 0 | 24 0 4060 ------------------------------------- 4061 Proc2 25 26 27 | 0 0 28 | 29 0 4062 30 0 0 | 31 32 33 | 0 34 4063 .ve 4064 4065 This can be represented as a collection of submatrices as 4066 .vb 4067 A B C 4068 D E F 4069 G H I 4070 .ve 4071 4072 Where the submatrices A,B,C are owned by proc0, D,E,F are 4073 owned by proc1, G,H,I are owned by proc2. 4074 4075 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4076 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4077 The 'M','N' parameters are 8,8, and have the same values on all procs. 4078 4079 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4080 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4081 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4082 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4083 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4084 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4085 4086 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4087 allocated for every row of the local diagonal submatrix, and `o_nz` 4088 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4089 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4090 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4091 In this case, the values of `d_nz`, `o_nz` are 4092 .vb 4093 proc0 dnz = 2, o_nz = 2 4094 proc1 dnz = 3, o_nz = 2 4095 proc2 dnz = 1, o_nz = 4 4096 .ve 4097 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4098 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4099 for proc3. i.e we are using 12+15+10=37 storage locations to store 4100 34 values. 4101 4102 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4103 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4104 In the above case the values for `d_nnz`, `o_nnz` are 4105 .vb 4106 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4107 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4108 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4109 .ve 4110 Here the space allocated is sum of all the above values i.e 34, and 4111 hence pre-allocation is perfect. 4112 4113 Level: intermediate 4114 4115 Notes: 4116 If the *_nnz parameter is given then the *_nz parameter is ignored 4117 4118 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4119 storage. The stored row and column indices begin with zero. 4120 See [Sparse Matrices](sec_matsparse) for details. 4121 4122 The parallel matrix is partitioned such that the first m0 rows belong to 4123 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4124 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4125 4126 The DIAGONAL portion of the local submatrix of a processor can be defined 4127 as the submatrix which is obtained by extraction the part corresponding to 4128 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4129 first row that belongs to the processor, r2 is the last row belonging to 4130 the this processor, and c1-c2 is range of indices of the local part of a 4131 vector suitable for applying the matrix to. This is an mxn matrix. In the 4132 common case of a square matrix, the row and column ranges are the same and 4133 the DIAGONAL part is also square. The remaining portion of the local 4134 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4135 4136 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4137 4138 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4139 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4140 You can also run with the option `-info` and look for messages with the string 4141 malloc in them to see if additional memory allocation was needed. 4142 4143 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4144 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4145 @*/ 4146 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4147 { 4148 PetscFunctionBegin; 4149 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4150 PetscValidType(B, 1); 4151 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4152 PetscFunctionReturn(PETSC_SUCCESS); 4153 } 4154 4155 /*@ 4156 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4157 CSR format for the local rows. 4158 4159 Collective 4160 4161 Input Parameters: 4162 + comm - MPI communicator 4163 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4164 . n - This value should be the same as the local size used in creating the 4165 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4166 calculated if N is given) For square matrices n is almost always m. 4167 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4168 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4169 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4170 . j - column indices 4171 - a - optional matrix values 4172 4173 Output Parameter: 4174 . mat - the matrix 4175 4176 Level: intermediate 4177 4178 Notes: 4179 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4180 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4181 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4182 4183 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4184 4185 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArrays()` 4186 4187 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 4188 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4189 4190 The format which is used for the sparse matrix input, is equivalent to a 4191 row-major ordering.. i.e for the following matrix, the input data expected is 4192 as shown 4193 .vb 4194 1 0 0 4195 2 0 3 P0 4196 ------- 4197 4 5 6 P1 4198 4199 Process0 [P0] rows_owned=[0,1] 4200 i = {0,1,3} [size = nrow+1 = 2+1] 4201 j = {0,0,2} [size = 3] 4202 v = {1,2,3} [size = 3] 4203 4204 Process1 [P1] rows_owned=[2] 4205 i = {0,3} [size = nrow+1 = 1+1] 4206 j = {0,1,2} [size = 3] 4207 v = {4,5,6} [size = 3] 4208 .ve 4209 4210 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4211 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4212 @*/ 4213 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4214 { 4215 PetscFunctionBegin; 4216 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4217 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4218 PetscCall(MatCreate(comm, mat)); 4219 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4220 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4221 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4222 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4223 PetscFunctionReturn(PETSC_SUCCESS); 4224 } 4225 4226 /*@ 4227 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4228 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4229 from `MatCreateMPIAIJWithArrays()` 4230 4231 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4232 4233 Collective 4234 4235 Input Parameters: 4236 + mat - the matrix 4237 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4238 . n - This value should be the same as the local size used in creating the 4239 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4240 calculated if N is given) For square matrices n is almost always m. 4241 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4242 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4243 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4244 . J - column indices 4245 - v - matrix values 4246 4247 Level: deprecated 4248 4249 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4250 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4251 @*/ 4252 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4253 { 4254 PetscInt nnz, i; 4255 PetscBool nooffprocentries; 4256 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4257 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4258 PetscScalar *ad, *ao; 4259 PetscInt ldi, Iii, md; 4260 const PetscInt *Adi = Ad->i; 4261 PetscInt *ld = Aij->ld; 4262 4263 PetscFunctionBegin; 4264 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4265 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4266 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4267 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4268 4269 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4270 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4271 4272 for (i = 0; i < m; i++) { 4273 if (PetscDefined(USE_DEBUG)) { 4274 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4275 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4276 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4277 } 4278 } 4279 nnz = Ii[i + 1] - Ii[i]; 4280 Iii = Ii[i]; 4281 ldi = ld[i]; 4282 md = Adi[i + 1] - Adi[i]; 4283 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4284 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4285 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4286 ad += md; 4287 ao += nnz - md; 4288 } 4289 nooffprocentries = mat->nooffprocentries; 4290 mat->nooffprocentries = PETSC_TRUE; 4291 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4292 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4293 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4294 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4295 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4296 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4297 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4298 mat->nooffprocentries = nooffprocentries; 4299 PetscFunctionReturn(PETSC_SUCCESS); 4300 } 4301 4302 /*@ 4303 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4304 4305 Collective 4306 4307 Input Parameters: 4308 + mat - the matrix 4309 - v - matrix values, stored by row 4310 4311 Level: intermediate 4312 4313 Notes: 4314 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4315 4316 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4317 4318 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4319 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4320 @*/ 4321 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4322 { 4323 PetscInt nnz, i, m; 4324 PetscBool nooffprocentries; 4325 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4326 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4327 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4328 PetscScalar *ad, *ao; 4329 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4330 PetscInt ldi, Iii, md; 4331 PetscInt *ld = Aij->ld; 4332 4333 PetscFunctionBegin; 4334 m = mat->rmap->n; 4335 4336 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4337 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4338 Iii = 0; 4339 for (i = 0; i < m; i++) { 4340 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4341 ldi = ld[i]; 4342 md = Adi[i + 1] - Adi[i]; 4343 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4344 ad += md; 4345 if (ao) { 4346 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4347 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4348 ao += nnz - md; 4349 } 4350 Iii += nnz; 4351 } 4352 nooffprocentries = mat->nooffprocentries; 4353 mat->nooffprocentries = PETSC_TRUE; 4354 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4355 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4356 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4357 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4358 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4359 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4360 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4361 mat->nooffprocentries = nooffprocentries; 4362 PetscFunctionReturn(PETSC_SUCCESS); 4363 } 4364 4365 /*@C 4366 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4367 (the default parallel PETSc format). For good matrix assembly performance 4368 the user should preallocate the matrix storage by setting the parameters 4369 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4370 4371 Collective 4372 4373 Input Parameters: 4374 + comm - MPI communicator 4375 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4376 This value should be the same as the local size used in creating the 4377 y vector for the matrix-vector product y = Ax. 4378 . n - This value should be the same as the local size used in creating the 4379 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4380 calculated if N is given) For square matrices n is almost always m. 4381 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4382 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4383 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4384 (same value is used for all local rows) 4385 . d_nnz - array containing the number of nonzeros in the various rows of the 4386 DIAGONAL portion of the local submatrix (possibly different for each row) 4387 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4388 The size of this array is equal to the number of local rows, i.e 'm'. 4389 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4390 submatrix (same value is used for all local rows). 4391 - o_nnz - array containing the number of nonzeros in the various rows of the 4392 OFF-DIAGONAL portion of the local submatrix (possibly different for 4393 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4394 structure. The size of this array is equal to the number 4395 of local rows, i.e 'm'. 4396 4397 Output Parameter: 4398 . A - the matrix 4399 4400 Options Database Keys: 4401 + -mat_no_inode - Do not use inodes 4402 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4403 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4404 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4405 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4406 4407 Level: intermediate 4408 4409 Notes: 4410 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4411 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4412 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4413 4414 If the *_nnz parameter is given then the *_nz parameter is ignored 4415 4416 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4417 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4418 storage requirements for this matrix. 4419 4420 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4421 processor than it must be used on all processors that share the object for 4422 that argument. 4423 4424 The user MUST specify either the local or global matrix dimensions 4425 (possibly both). 4426 4427 The parallel matrix is partitioned across processors such that the 4428 first m0 rows belong to process 0, the next m1 rows belong to 4429 process 1, the next m2 rows belong to process 2 etc.. where 4430 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4431 values corresponding to [m x N] submatrix. 4432 4433 The columns are logically partitioned with the n0 columns belonging 4434 to 0th partition, the next n1 columns belonging to the next 4435 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4436 4437 The DIAGONAL portion of the local submatrix on any given processor 4438 is the submatrix corresponding to the rows and columns m,n 4439 corresponding to the given processor. i.e diagonal matrix on 4440 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4441 etc. The remaining portion of the local submatrix [m x (N-n)] 4442 constitute the OFF-DIAGONAL portion. The example below better 4443 illustrates this concept. 4444 4445 For a square global matrix we define each processor's diagonal portion 4446 to be its local rows and the corresponding columns (a square submatrix); 4447 each processor's off-diagonal portion encompasses the remainder of the 4448 local matrix (a rectangular submatrix). 4449 4450 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4451 4452 When calling this routine with a single process communicator, a matrix of 4453 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4454 type of communicator, use the construction mechanism 4455 .vb 4456 MatCreate(..., &A); 4457 MatSetType(A, MATMPIAIJ); 4458 MatSetSizes(A, m, n, M, N); 4459 MatMPIAIJSetPreallocation(A, ...); 4460 .ve 4461 4462 By default, this format uses inodes (identical nodes) when possible. 4463 We search for consecutive rows with the same nonzero structure, thereby 4464 reusing matrix information to achieve increased efficiency. 4465 4466 Example Usage: 4467 Consider the following 8x8 matrix with 34 non-zero values, that is 4468 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4469 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4470 as follows 4471 4472 .vb 4473 1 2 0 | 0 3 0 | 0 4 4474 Proc0 0 5 6 | 7 0 0 | 8 0 4475 9 0 10 | 11 0 0 | 12 0 4476 ------------------------------------- 4477 13 0 14 | 15 16 17 | 0 0 4478 Proc1 0 18 0 | 19 20 21 | 0 0 4479 0 0 0 | 22 23 0 | 24 0 4480 ------------------------------------- 4481 Proc2 25 26 27 | 0 0 28 | 29 0 4482 30 0 0 | 31 32 33 | 0 34 4483 .ve 4484 4485 This can be represented as a collection of submatrices as 4486 4487 .vb 4488 A B C 4489 D E F 4490 G H I 4491 .ve 4492 4493 Where the submatrices A,B,C are owned by proc0, D,E,F are 4494 owned by proc1, G,H,I are owned by proc2. 4495 4496 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4497 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4498 The 'M','N' parameters are 8,8, and have the same values on all procs. 4499 4500 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4501 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4502 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4503 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4504 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4505 matrix, ans [DF] as another SeqAIJ matrix. 4506 4507 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4508 allocated for every row of the local diagonal submatrix, and `o_nz` 4509 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4510 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4511 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4512 In this case, the values of `d_nz`,`o_nz` are 4513 .vb 4514 proc0 dnz = 2, o_nz = 2 4515 proc1 dnz = 3, o_nz = 2 4516 proc2 dnz = 1, o_nz = 4 4517 .ve 4518 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4519 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4520 for proc3. i.e we are using 12+15+10=37 storage locations to store 4521 34 values. 4522 4523 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4524 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4525 In the above case the values for d_nnz,o_nnz are 4526 .vb 4527 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4528 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4529 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4530 .ve 4531 Here the space allocated is sum of all the above values i.e 34, and 4532 hence pre-allocation is perfect. 4533 4534 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4535 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4536 @*/ 4537 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4538 { 4539 PetscMPIInt size; 4540 4541 PetscFunctionBegin; 4542 PetscCall(MatCreate(comm, A)); 4543 PetscCall(MatSetSizes(*A, m, n, M, N)); 4544 PetscCallMPI(MPI_Comm_size(comm, &size)); 4545 if (size > 1) { 4546 PetscCall(MatSetType(*A, MATMPIAIJ)); 4547 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4548 } else { 4549 PetscCall(MatSetType(*A, MATSEQAIJ)); 4550 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4551 } 4552 PetscFunctionReturn(PETSC_SUCCESS); 4553 } 4554 4555 /*MC 4556 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4557 4558 Synopsis: 4559 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4560 4561 Not Collective 4562 4563 Input Parameter: 4564 . A - the `MATMPIAIJ` matrix 4565 4566 Output Parameters: 4567 + Ad - the diagonal portion of the matrix 4568 . Ao - the off-diagonal portion of the matrix 4569 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4570 - ierr - error code 4571 4572 Level: advanced 4573 4574 Note: 4575 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4576 4577 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4578 M*/ 4579 4580 /*MC 4581 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4582 4583 Synopsis: 4584 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4585 4586 Not Collective 4587 4588 Input Parameters: 4589 + A - the `MATMPIAIJ` matrix 4590 . Ad - the diagonal portion of the matrix 4591 . Ao - the off-diagonal portion of the matrix 4592 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4593 - ierr - error code 4594 4595 Level: advanced 4596 4597 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4598 M*/ 4599 4600 /*@C 4601 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4602 4603 Not Collective 4604 4605 Input Parameter: 4606 . A - The `MATMPIAIJ` matrix 4607 4608 Output Parameters: 4609 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4610 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4611 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4612 4613 Level: intermediate 4614 4615 Note: 4616 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4617 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4618 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4619 local column numbers to global column numbers in the original matrix. 4620 4621 Fortran Notes: 4622 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4623 4624 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4625 @*/ 4626 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4627 { 4628 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4629 PetscBool flg; 4630 4631 PetscFunctionBegin; 4632 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4633 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4634 if (Ad) *Ad = a->A; 4635 if (Ao) *Ao = a->B; 4636 if (colmap) *colmap = a->garray; 4637 PetscFunctionReturn(PETSC_SUCCESS); 4638 } 4639 4640 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4641 { 4642 PetscInt m, N, i, rstart, nnz, Ii; 4643 PetscInt *indx; 4644 PetscScalar *values; 4645 MatType rootType; 4646 4647 PetscFunctionBegin; 4648 PetscCall(MatGetSize(inmat, &m, &N)); 4649 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4650 PetscInt *dnz, *onz, sum, bs, cbs; 4651 4652 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4653 /* Check sum(n) = N */ 4654 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4655 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4656 4657 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4658 rstart -= m; 4659 4660 MatPreallocateBegin(comm, m, n, dnz, onz); 4661 for (i = 0; i < m; i++) { 4662 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4663 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4664 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4665 } 4666 4667 PetscCall(MatCreate(comm, outmat)); 4668 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4669 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4670 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4671 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4672 PetscCall(MatSetType(*outmat, rootType)); 4673 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4674 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4675 MatPreallocateEnd(dnz, onz); 4676 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4677 } 4678 4679 /* numeric phase */ 4680 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4681 for (i = 0; i < m; i++) { 4682 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4683 Ii = i + rstart; 4684 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4685 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4686 } 4687 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4688 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4689 PetscFunctionReturn(PETSC_SUCCESS); 4690 } 4691 4692 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4693 { 4694 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4695 4696 PetscFunctionBegin; 4697 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4698 PetscCall(PetscFree(merge->id_r)); 4699 PetscCall(PetscFree(merge->len_s)); 4700 PetscCall(PetscFree(merge->len_r)); 4701 PetscCall(PetscFree(merge->bi)); 4702 PetscCall(PetscFree(merge->bj)); 4703 PetscCall(PetscFree(merge->buf_ri[0])); 4704 PetscCall(PetscFree(merge->buf_ri)); 4705 PetscCall(PetscFree(merge->buf_rj[0])); 4706 PetscCall(PetscFree(merge->buf_rj)); 4707 PetscCall(PetscFree(merge->coi)); 4708 PetscCall(PetscFree(merge->coj)); 4709 PetscCall(PetscFree(merge->owners_co)); 4710 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4711 PetscCall(PetscFree(merge)); 4712 PetscFunctionReturn(PETSC_SUCCESS); 4713 } 4714 4715 #include <../src/mat/utils/freespace.h> 4716 #include <petscbt.h> 4717 4718 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4719 { 4720 MPI_Comm comm; 4721 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4722 PetscMPIInt size, rank, taga, *len_s; 4723 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4724 PetscInt proc, m; 4725 PetscInt **buf_ri, **buf_rj; 4726 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4727 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4728 MPI_Request *s_waits, *r_waits; 4729 MPI_Status *status; 4730 const MatScalar *aa, *a_a; 4731 MatScalar **abuf_r, *ba_i; 4732 Mat_Merge_SeqsToMPI *merge; 4733 PetscContainer container; 4734 4735 PetscFunctionBegin; 4736 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4737 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4738 4739 PetscCallMPI(MPI_Comm_size(comm, &size)); 4740 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4741 4742 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4743 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4744 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4745 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4746 aa = a_a; 4747 4748 bi = merge->bi; 4749 bj = merge->bj; 4750 buf_ri = merge->buf_ri; 4751 buf_rj = merge->buf_rj; 4752 4753 PetscCall(PetscMalloc1(size, &status)); 4754 owners = merge->rowmap->range; 4755 len_s = merge->len_s; 4756 4757 /* send and recv matrix values */ 4758 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4759 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4760 4761 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4762 for (proc = 0, k = 0; proc < size; proc++) { 4763 if (!len_s[proc]) continue; 4764 i = owners[proc]; 4765 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4766 k++; 4767 } 4768 4769 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4770 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4771 PetscCall(PetscFree(status)); 4772 4773 PetscCall(PetscFree(s_waits)); 4774 PetscCall(PetscFree(r_waits)); 4775 4776 /* insert mat values of mpimat */ 4777 PetscCall(PetscMalloc1(N, &ba_i)); 4778 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4779 4780 for (k = 0; k < merge->nrecv; k++) { 4781 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4782 nrows = *(buf_ri_k[k]); 4783 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4784 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4785 } 4786 4787 /* set values of ba */ 4788 m = merge->rowmap->n; 4789 for (i = 0; i < m; i++) { 4790 arow = owners[rank] + i; 4791 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4792 bnzi = bi[i + 1] - bi[i]; 4793 PetscCall(PetscArrayzero(ba_i, bnzi)); 4794 4795 /* add local non-zero vals of this proc's seqmat into ba */ 4796 anzi = ai[arow + 1] - ai[arow]; 4797 aj = a->j + ai[arow]; 4798 aa = a_a + ai[arow]; 4799 nextaj = 0; 4800 for (j = 0; nextaj < anzi; j++) { 4801 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4802 ba_i[j] += aa[nextaj++]; 4803 } 4804 } 4805 4806 /* add received vals into ba */ 4807 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4808 /* i-th row */ 4809 if (i == *nextrow[k]) { 4810 anzi = *(nextai[k] + 1) - *nextai[k]; 4811 aj = buf_rj[k] + *(nextai[k]); 4812 aa = abuf_r[k] + *(nextai[k]); 4813 nextaj = 0; 4814 for (j = 0; nextaj < anzi; j++) { 4815 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4816 ba_i[j] += aa[nextaj++]; 4817 } 4818 } 4819 nextrow[k]++; 4820 nextai[k]++; 4821 } 4822 } 4823 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4824 } 4825 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4826 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4827 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4828 4829 PetscCall(PetscFree(abuf_r[0])); 4830 PetscCall(PetscFree(abuf_r)); 4831 PetscCall(PetscFree(ba_i)); 4832 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4833 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4834 PetscFunctionReturn(PETSC_SUCCESS); 4835 } 4836 4837 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4838 { 4839 Mat B_mpi; 4840 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4841 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4842 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4843 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4844 PetscInt len, proc, *dnz, *onz, bs, cbs; 4845 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4846 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4847 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4848 MPI_Status *status; 4849 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4850 PetscBT lnkbt; 4851 Mat_Merge_SeqsToMPI *merge; 4852 PetscContainer container; 4853 4854 PetscFunctionBegin; 4855 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4856 4857 /* make sure it is a PETSc comm */ 4858 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4859 PetscCallMPI(MPI_Comm_size(comm, &size)); 4860 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4861 4862 PetscCall(PetscNew(&merge)); 4863 PetscCall(PetscMalloc1(size, &status)); 4864 4865 /* determine row ownership */ 4866 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4867 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4868 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4869 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4870 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4871 PetscCall(PetscMalloc1(size, &len_si)); 4872 PetscCall(PetscMalloc1(size, &merge->len_s)); 4873 4874 m = merge->rowmap->n; 4875 owners = merge->rowmap->range; 4876 4877 /* determine the number of messages to send, their lengths */ 4878 len_s = merge->len_s; 4879 4880 len = 0; /* length of buf_si[] */ 4881 merge->nsend = 0; 4882 for (proc = 0; proc < size; proc++) { 4883 len_si[proc] = 0; 4884 if (proc == rank) { 4885 len_s[proc] = 0; 4886 } else { 4887 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4888 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4889 } 4890 if (len_s[proc]) { 4891 merge->nsend++; 4892 nrows = 0; 4893 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4894 if (ai[i + 1] > ai[i]) nrows++; 4895 } 4896 len_si[proc] = 2 * (nrows + 1); 4897 len += len_si[proc]; 4898 } 4899 } 4900 4901 /* determine the number and length of messages to receive for ij-structure */ 4902 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4903 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4904 4905 /* post the Irecv of j-structure */ 4906 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4907 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4908 4909 /* post the Isend of j-structure */ 4910 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4911 4912 for (proc = 0, k = 0; proc < size; proc++) { 4913 if (!len_s[proc]) continue; 4914 i = owners[proc]; 4915 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4916 k++; 4917 } 4918 4919 /* receives and sends of j-structure are complete */ 4920 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4921 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4922 4923 /* send and recv i-structure */ 4924 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4925 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4926 4927 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4928 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4929 for (proc = 0, k = 0; proc < size; proc++) { 4930 if (!len_s[proc]) continue; 4931 /* form outgoing message for i-structure: 4932 buf_si[0]: nrows to be sent 4933 [1:nrows]: row index (global) 4934 [nrows+1:2*nrows+1]: i-structure index 4935 */ 4936 nrows = len_si[proc] / 2 - 1; 4937 buf_si_i = buf_si + nrows + 1; 4938 buf_si[0] = nrows; 4939 buf_si_i[0] = 0; 4940 nrows = 0; 4941 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4942 anzi = ai[i + 1] - ai[i]; 4943 if (anzi) { 4944 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4945 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4946 nrows++; 4947 } 4948 } 4949 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4950 k++; 4951 buf_si += len_si[proc]; 4952 } 4953 4954 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4955 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4956 4957 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4958 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4959 4960 PetscCall(PetscFree(len_si)); 4961 PetscCall(PetscFree(len_ri)); 4962 PetscCall(PetscFree(rj_waits)); 4963 PetscCall(PetscFree2(si_waits, sj_waits)); 4964 PetscCall(PetscFree(ri_waits)); 4965 PetscCall(PetscFree(buf_s)); 4966 PetscCall(PetscFree(status)); 4967 4968 /* compute a local seq matrix in each processor */ 4969 /* allocate bi array and free space for accumulating nonzero column info */ 4970 PetscCall(PetscMalloc1(m + 1, &bi)); 4971 bi[0] = 0; 4972 4973 /* create and initialize a linked list */ 4974 nlnk = N + 1; 4975 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4976 4977 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4978 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4979 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4980 4981 current_space = free_space; 4982 4983 /* determine symbolic info for each local row */ 4984 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4985 4986 for (k = 0; k < merge->nrecv; k++) { 4987 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4988 nrows = *buf_ri_k[k]; 4989 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4990 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4991 } 4992 4993 MatPreallocateBegin(comm, m, n, dnz, onz); 4994 len = 0; 4995 for (i = 0; i < m; i++) { 4996 bnzi = 0; 4997 /* add local non-zero cols of this proc's seqmat into lnk */ 4998 arow = owners[rank] + i; 4999 anzi = ai[arow + 1] - ai[arow]; 5000 aj = a->j + ai[arow]; 5001 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5002 bnzi += nlnk; 5003 /* add received col data into lnk */ 5004 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5005 if (i == *nextrow[k]) { /* i-th row */ 5006 anzi = *(nextai[k] + 1) - *nextai[k]; 5007 aj = buf_rj[k] + *nextai[k]; 5008 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5009 bnzi += nlnk; 5010 nextrow[k]++; 5011 nextai[k]++; 5012 } 5013 } 5014 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5015 5016 /* if free space is not available, make more free space */ 5017 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5018 /* copy data into free space, then initialize lnk */ 5019 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5020 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5021 5022 current_space->array += bnzi; 5023 current_space->local_used += bnzi; 5024 current_space->local_remaining -= bnzi; 5025 5026 bi[i + 1] = bi[i] + bnzi; 5027 } 5028 5029 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5030 5031 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5032 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5033 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5034 5035 /* create symbolic parallel matrix B_mpi */ 5036 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5037 PetscCall(MatCreate(comm, &B_mpi)); 5038 if (n == PETSC_DECIDE) { 5039 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5040 } else { 5041 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5042 } 5043 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5044 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5045 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5046 MatPreallocateEnd(dnz, onz); 5047 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5048 5049 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5050 B_mpi->assembled = PETSC_FALSE; 5051 merge->bi = bi; 5052 merge->bj = bj; 5053 merge->buf_ri = buf_ri; 5054 merge->buf_rj = buf_rj; 5055 merge->coi = NULL; 5056 merge->coj = NULL; 5057 merge->owners_co = NULL; 5058 5059 PetscCall(PetscCommDestroy(&comm)); 5060 5061 /* attach the supporting struct to B_mpi for reuse */ 5062 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5063 PetscCall(PetscContainerSetPointer(container, merge)); 5064 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5065 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5066 PetscCall(PetscContainerDestroy(&container)); 5067 *mpimat = B_mpi; 5068 5069 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5070 PetscFunctionReturn(PETSC_SUCCESS); 5071 } 5072 5073 /*@C 5074 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5075 matrices from each processor 5076 5077 Collective 5078 5079 Input Parameters: 5080 + comm - the communicators the parallel matrix will live on 5081 . seqmat - the input sequential matrices 5082 . m - number of local rows (or `PETSC_DECIDE`) 5083 . n - number of local columns (or `PETSC_DECIDE`) 5084 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5085 5086 Output Parameter: 5087 . mpimat - the parallel matrix generated 5088 5089 Level: advanced 5090 5091 Note: 5092 The dimensions of the sequential matrix in each processor MUST be the same. 5093 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5094 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5095 5096 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5097 @*/ 5098 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5099 { 5100 PetscMPIInt size; 5101 5102 PetscFunctionBegin; 5103 PetscCallMPI(MPI_Comm_size(comm, &size)); 5104 if (size == 1) { 5105 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5106 if (scall == MAT_INITIAL_MATRIX) { 5107 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5108 } else { 5109 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5110 } 5111 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5112 PetscFunctionReturn(PETSC_SUCCESS); 5113 } 5114 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5115 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5116 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5117 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5118 PetscFunctionReturn(PETSC_SUCCESS); 5119 } 5120 5121 /*@ 5122 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5123 5124 Not Collective 5125 5126 Input Parameter: 5127 . A - the matrix 5128 5129 Output Parameter: 5130 . A_loc - the local sequential matrix generated 5131 5132 Level: developer 5133 5134 Notes: 5135 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5136 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5137 `n` is the global column count obtained with `MatGetSize()` 5138 5139 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5140 5141 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5142 5143 Destroy the matrix with `MatDestroy()` 5144 5145 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5146 @*/ 5147 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5148 { 5149 PetscBool mpi; 5150 5151 PetscFunctionBegin; 5152 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5153 if (mpi) { 5154 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5155 } else { 5156 *A_loc = A; 5157 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5158 } 5159 PetscFunctionReturn(PETSC_SUCCESS); 5160 } 5161 5162 /*@ 5163 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5164 5165 Not Collective 5166 5167 Input Parameters: 5168 + A - the matrix 5169 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5170 5171 Output Parameter: 5172 . A_loc - the local sequential matrix generated 5173 5174 Level: developer 5175 5176 Notes: 5177 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5178 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5179 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5180 5181 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5182 5183 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5184 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5185 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5186 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5187 5188 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5189 @*/ 5190 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5191 { 5192 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5193 Mat_SeqAIJ *mat, *a, *b; 5194 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5195 const PetscScalar *aa, *ba, *aav, *bav; 5196 PetscScalar *ca, *cam; 5197 PetscMPIInt size; 5198 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5199 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5200 PetscBool match; 5201 5202 PetscFunctionBegin; 5203 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5204 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5205 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5206 if (size == 1) { 5207 if (scall == MAT_INITIAL_MATRIX) { 5208 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5209 *A_loc = mpimat->A; 5210 } else if (scall == MAT_REUSE_MATRIX) { 5211 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5212 } 5213 PetscFunctionReturn(PETSC_SUCCESS); 5214 } 5215 5216 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5217 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5218 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5219 ai = a->i; 5220 aj = a->j; 5221 bi = b->i; 5222 bj = b->j; 5223 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5224 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5225 aa = aav; 5226 ba = bav; 5227 if (scall == MAT_INITIAL_MATRIX) { 5228 PetscCall(PetscMalloc1(1 + am, &ci)); 5229 ci[0] = 0; 5230 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5231 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5232 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5233 k = 0; 5234 for (i = 0; i < am; i++) { 5235 ncols_o = bi[i + 1] - bi[i]; 5236 ncols_d = ai[i + 1] - ai[i]; 5237 /* off-diagonal portion of A */ 5238 for (jo = 0; jo < ncols_o; jo++) { 5239 col = cmap[*bj]; 5240 if (col >= cstart) break; 5241 cj[k] = col; 5242 bj++; 5243 ca[k++] = *ba++; 5244 } 5245 /* diagonal portion of A */ 5246 for (j = 0; j < ncols_d; j++) { 5247 cj[k] = cstart + *aj++; 5248 ca[k++] = *aa++; 5249 } 5250 /* off-diagonal portion of A */ 5251 for (j = jo; j < ncols_o; j++) { 5252 cj[k] = cmap[*bj++]; 5253 ca[k++] = *ba++; 5254 } 5255 } 5256 /* put together the new matrix */ 5257 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5258 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5259 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5260 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5261 mat->free_a = PETSC_TRUE; 5262 mat->free_ij = PETSC_TRUE; 5263 mat->nonew = 0; 5264 } else if (scall == MAT_REUSE_MATRIX) { 5265 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5266 ci = mat->i; 5267 cj = mat->j; 5268 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5269 for (i = 0; i < am; i++) { 5270 /* off-diagonal portion of A */ 5271 ncols_o = bi[i + 1] - bi[i]; 5272 for (jo = 0; jo < ncols_o; jo++) { 5273 col = cmap[*bj]; 5274 if (col >= cstart) break; 5275 *cam++ = *ba++; 5276 bj++; 5277 } 5278 /* diagonal portion of A */ 5279 ncols_d = ai[i + 1] - ai[i]; 5280 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5281 /* off-diagonal portion of A */ 5282 for (j = jo; j < ncols_o; j++) { 5283 *cam++ = *ba++; 5284 bj++; 5285 } 5286 } 5287 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5288 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5289 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5290 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5291 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5292 PetscFunctionReturn(PETSC_SUCCESS); 5293 } 5294 5295 /*@ 5296 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5297 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5298 5299 Not Collective 5300 5301 Input Parameters: 5302 + A - the matrix 5303 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5304 5305 Output Parameters: 5306 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5307 - A_loc - the local sequential matrix generated 5308 5309 Level: developer 5310 5311 Note: 5312 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5313 part, then those associated with the off-diagonal part (in its local ordering) 5314 5315 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5316 @*/ 5317 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5318 { 5319 Mat Ao, Ad; 5320 const PetscInt *cmap; 5321 PetscMPIInt size; 5322 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5323 5324 PetscFunctionBegin; 5325 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5326 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5327 if (size == 1) { 5328 if (scall == MAT_INITIAL_MATRIX) { 5329 PetscCall(PetscObjectReference((PetscObject)Ad)); 5330 *A_loc = Ad; 5331 } else if (scall == MAT_REUSE_MATRIX) { 5332 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5333 } 5334 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5335 PetscFunctionReturn(PETSC_SUCCESS); 5336 } 5337 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5338 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5339 if (f) { 5340 PetscCall((*f)(A, scall, glob, A_loc)); 5341 } else { 5342 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5343 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5344 Mat_SeqAIJ *c; 5345 PetscInt *ai = a->i, *aj = a->j; 5346 PetscInt *bi = b->i, *bj = b->j; 5347 PetscInt *ci, *cj; 5348 const PetscScalar *aa, *ba; 5349 PetscScalar *ca; 5350 PetscInt i, j, am, dn, on; 5351 5352 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5353 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5354 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5355 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5356 if (scall == MAT_INITIAL_MATRIX) { 5357 PetscInt k; 5358 PetscCall(PetscMalloc1(1 + am, &ci)); 5359 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5360 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5361 ci[0] = 0; 5362 for (i = 0, k = 0; i < am; i++) { 5363 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5364 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5365 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5366 /* diagonal portion of A */ 5367 for (j = 0; j < ncols_d; j++, k++) { 5368 cj[k] = *aj++; 5369 ca[k] = *aa++; 5370 } 5371 /* off-diagonal portion of A */ 5372 for (j = 0; j < ncols_o; j++, k++) { 5373 cj[k] = dn + *bj++; 5374 ca[k] = *ba++; 5375 } 5376 } 5377 /* put together the new matrix */ 5378 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5379 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5380 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5381 c = (Mat_SeqAIJ *)(*A_loc)->data; 5382 c->free_a = PETSC_TRUE; 5383 c->free_ij = PETSC_TRUE; 5384 c->nonew = 0; 5385 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5386 } else if (scall == MAT_REUSE_MATRIX) { 5387 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5388 for (i = 0; i < am; i++) { 5389 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5390 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5391 /* diagonal portion of A */ 5392 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5393 /* off-diagonal portion of A */ 5394 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5395 } 5396 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5397 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5398 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5399 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5400 if (glob) { 5401 PetscInt cst, *gidx; 5402 5403 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5404 PetscCall(PetscMalloc1(dn + on, &gidx)); 5405 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5406 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5407 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5408 } 5409 } 5410 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5411 PetscFunctionReturn(PETSC_SUCCESS); 5412 } 5413 5414 /*@C 5415 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5416 5417 Not Collective 5418 5419 Input Parameters: 5420 + A - the matrix 5421 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5422 . row - index set of rows to extract (or `NULL`) 5423 - col - index set of columns to extract (or `NULL`) 5424 5425 Output Parameter: 5426 . A_loc - the local sequential matrix generated 5427 5428 Level: developer 5429 5430 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5431 @*/ 5432 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5433 { 5434 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5435 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5436 IS isrowa, iscola; 5437 Mat *aloc; 5438 PetscBool match; 5439 5440 PetscFunctionBegin; 5441 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5442 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5443 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5444 if (!row) { 5445 start = A->rmap->rstart; 5446 end = A->rmap->rend; 5447 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5448 } else { 5449 isrowa = *row; 5450 } 5451 if (!col) { 5452 start = A->cmap->rstart; 5453 cmap = a->garray; 5454 nzA = a->A->cmap->n; 5455 nzB = a->B->cmap->n; 5456 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5457 ncols = 0; 5458 for (i = 0; i < nzB; i++) { 5459 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5460 else break; 5461 } 5462 imark = i; 5463 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5464 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5465 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5466 } else { 5467 iscola = *col; 5468 } 5469 if (scall != MAT_INITIAL_MATRIX) { 5470 PetscCall(PetscMalloc1(1, &aloc)); 5471 aloc[0] = *A_loc; 5472 } 5473 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5474 if (!col) { /* attach global id of condensed columns */ 5475 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5476 } 5477 *A_loc = aloc[0]; 5478 PetscCall(PetscFree(aloc)); 5479 if (!row) PetscCall(ISDestroy(&isrowa)); 5480 if (!col) PetscCall(ISDestroy(&iscola)); 5481 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5482 PetscFunctionReturn(PETSC_SUCCESS); 5483 } 5484 5485 /* 5486 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5487 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5488 * on a global size. 5489 * */ 5490 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5491 { 5492 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5493 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5494 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5495 PetscMPIInt owner; 5496 PetscSFNode *iremote, *oiremote; 5497 const PetscInt *lrowindices; 5498 PetscSF sf, osf; 5499 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5500 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5501 MPI_Comm comm; 5502 ISLocalToGlobalMapping mapping; 5503 const PetscScalar *pd_a, *po_a; 5504 5505 PetscFunctionBegin; 5506 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5507 /* plocalsize is the number of roots 5508 * nrows is the number of leaves 5509 * */ 5510 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5511 PetscCall(ISGetLocalSize(rows, &nrows)); 5512 PetscCall(PetscCalloc1(nrows, &iremote)); 5513 PetscCall(ISGetIndices(rows, &lrowindices)); 5514 for (i = 0; i < nrows; i++) { 5515 /* Find a remote index and an owner for a row 5516 * The row could be local or remote 5517 * */ 5518 owner = 0; 5519 lidx = 0; 5520 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5521 iremote[i].index = lidx; 5522 iremote[i].rank = owner; 5523 } 5524 /* Create SF to communicate how many nonzero columns for each row */ 5525 PetscCall(PetscSFCreate(comm, &sf)); 5526 /* SF will figure out the number of nonzero columns for each row, and their 5527 * offsets 5528 * */ 5529 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5530 PetscCall(PetscSFSetFromOptions(sf)); 5531 PetscCall(PetscSFSetUp(sf)); 5532 5533 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5534 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5535 PetscCall(PetscCalloc1(nrows, &pnnz)); 5536 roffsets[0] = 0; 5537 roffsets[1] = 0; 5538 for (i = 0; i < plocalsize; i++) { 5539 /* diagonal */ 5540 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5541 /* off-diagonal */ 5542 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5543 /* compute offsets so that we relative location for each row */ 5544 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5545 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5546 } 5547 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5548 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5549 /* 'r' means root, and 'l' means leaf */ 5550 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5551 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5552 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5553 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5554 PetscCall(PetscSFDestroy(&sf)); 5555 PetscCall(PetscFree(roffsets)); 5556 PetscCall(PetscFree(nrcols)); 5557 dntotalcols = 0; 5558 ontotalcols = 0; 5559 ncol = 0; 5560 for (i = 0; i < nrows; i++) { 5561 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5562 ncol = PetscMax(pnnz[i], ncol); 5563 /* diagonal */ 5564 dntotalcols += nlcols[i * 2 + 0]; 5565 /* off-diagonal */ 5566 ontotalcols += nlcols[i * 2 + 1]; 5567 } 5568 /* We do not need to figure the right number of columns 5569 * since all the calculations will be done by going through the raw data 5570 * */ 5571 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5572 PetscCall(MatSetUp(*P_oth)); 5573 PetscCall(PetscFree(pnnz)); 5574 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5575 /* diagonal */ 5576 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5577 /* off-diagonal */ 5578 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5579 /* diagonal */ 5580 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5581 /* off-diagonal */ 5582 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5583 dntotalcols = 0; 5584 ontotalcols = 0; 5585 ntotalcols = 0; 5586 for (i = 0; i < nrows; i++) { 5587 owner = 0; 5588 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5589 /* Set iremote for diag matrix */ 5590 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5591 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5592 iremote[dntotalcols].rank = owner; 5593 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5594 ilocal[dntotalcols++] = ntotalcols++; 5595 } 5596 /* off-diagonal */ 5597 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5598 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5599 oiremote[ontotalcols].rank = owner; 5600 oilocal[ontotalcols++] = ntotalcols++; 5601 } 5602 } 5603 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5604 PetscCall(PetscFree(loffsets)); 5605 PetscCall(PetscFree(nlcols)); 5606 PetscCall(PetscSFCreate(comm, &sf)); 5607 /* P serves as roots and P_oth is leaves 5608 * Diag matrix 5609 * */ 5610 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5611 PetscCall(PetscSFSetFromOptions(sf)); 5612 PetscCall(PetscSFSetUp(sf)); 5613 5614 PetscCall(PetscSFCreate(comm, &osf)); 5615 /* off-diagonal */ 5616 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5617 PetscCall(PetscSFSetFromOptions(osf)); 5618 PetscCall(PetscSFSetUp(osf)); 5619 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5620 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5621 /* operate on the matrix internal data to save memory */ 5622 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5623 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5624 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5625 /* Convert to global indices for diag matrix */ 5626 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5627 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5628 /* We want P_oth store global indices */ 5629 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5630 /* Use memory scalable approach */ 5631 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5632 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5633 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5634 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5635 /* Convert back to local indices */ 5636 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5637 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5638 nout = 0; 5639 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5640 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5641 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5642 /* Exchange values */ 5643 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5644 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5645 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5646 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5647 /* Stop PETSc from shrinking memory */ 5648 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5649 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5650 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5651 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5652 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5653 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5654 PetscCall(PetscSFDestroy(&sf)); 5655 PetscCall(PetscSFDestroy(&osf)); 5656 PetscFunctionReturn(PETSC_SUCCESS); 5657 } 5658 5659 /* 5660 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5661 * This supports MPIAIJ and MAIJ 5662 * */ 5663 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5664 { 5665 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5666 Mat_SeqAIJ *p_oth; 5667 IS rows, map; 5668 PetscHMapI hamp; 5669 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5670 MPI_Comm comm; 5671 PetscSF sf, osf; 5672 PetscBool has; 5673 5674 PetscFunctionBegin; 5675 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5676 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5677 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5678 * and then create a submatrix (that often is an overlapping matrix) 5679 * */ 5680 if (reuse == MAT_INITIAL_MATRIX) { 5681 /* Use a hash table to figure out unique keys */ 5682 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5683 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5684 count = 0; 5685 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5686 for (i = 0; i < a->B->cmap->n; i++) { 5687 key = a->garray[i] / dof; 5688 PetscCall(PetscHMapIHas(hamp, key, &has)); 5689 if (!has) { 5690 mapping[i] = count; 5691 PetscCall(PetscHMapISet(hamp, key, count++)); 5692 } else { 5693 /* Current 'i' has the same value the previous step */ 5694 mapping[i] = count - 1; 5695 } 5696 } 5697 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5698 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5699 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5700 PetscCall(PetscCalloc1(htsize, &rowindices)); 5701 off = 0; 5702 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5703 PetscCall(PetscHMapIDestroy(&hamp)); 5704 PetscCall(PetscSortInt(htsize, rowindices)); 5705 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5706 /* In case, the matrix was already created but users want to recreate the matrix */ 5707 PetscCall(MatDestroy(P_oth)); 5708 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5709 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5710 PetscCall(ISDestroy(&map)); 5711 PetscCall(ISDestroy(&rows)); 5712 } else if (reuse == MAT_REUSE_MATRIX) { 5713 /* If matrix was already created, we simply update values using SF objects 5714 * that as attached to the matrix earlier. 5715 */ 5716 const PetscScalar *pd_a, *po_a; 5717 5718 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5719 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5720 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5721 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5722 /* Update values in place */ 5723 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5724 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5725 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5726 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5727 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5728 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5729 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5730 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5731 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5732 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5733 PetscFunctionReturn(PETSC_SUCCESS); 5734 } 5735 5736 /*@C 5737 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5738 5739 Collective 5740 5741 Input Parameters: 5742 + A - the first matrix in `MATMPIAIJ` format 5743 . B - the second matrix in `MATMPIAIJ` format 5744 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5745 5746 Output Parameters: 5747 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5748 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5749 - B_seq - the sequential matrix generated 5750 5751 Level: developer 5752 5753 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5754 @*/ 5755 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5756 { 5757 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5758 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5759 IS isrowb, iscolb; 5760 Mat *bseq = NULL; 5761 5762 PetscFunctionBegin; 5763 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5764 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5765 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5766 5767 if (scall == MAT_INITIAL_MATRIX) { 5768 start = A->cmap->rstart; 5769 cmap = a->garray; 5770 nzA = a->A->cmap->n; 5771 nzB = a->B->cmap->n; 5772 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5773 ncols = 0; 5774 for (i = 0; i < nzB; i++) { /* row < local row index */ 5775 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5776 else break; 5777 } 5778 imark = i; 5779 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5780 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5781 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5782 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5783 } else { 5784 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5785 isrowb = *rowb; 5786 iscolb = *colb; 5787 PetscCall(PetscMalloc1(1, &bseq)); 5788 bseq[0] = *B_seq; 5789 } 5790 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5791 *B_seq = bseq[0]; 5792 PetscCall(PetscFree(bseq)); 5793 if (!rowb) { 5794 PetscCall(ISDestroy(&isrowb)); 5795 } else { 5796 *rowb = isrowb; 5797 } 5798 if (!colb) { 5799 PetscCall(ISDestroy(&iscolb)); 5800 } else { 5801 *colb = iscolb; 5802 } 5803 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5804 PetscFunctionReturn(PETSC_SUCCESS); 5805 } 5806 5807 /* 5808 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5809 of the OFF-DIAGONAL portion of local A 5810 5811 Collective 5812 5813 Input Parameters: 5814 + A,B - the matrices in `MATMPIAIJ` format 5815 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5816 5817 Output Parameter: 5818 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5819 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5820 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5821 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5822 5823 Developer Note: 5824 This directly accesses information inside the VecScatter associated with the matrix-vector product 5825 for this matrix. This is not desirable.. 5826 5827 Level: developer 5828 5829 */ 5830 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5831 { 5832 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5833 Mat_SeqAIJ *b_oth; 5834 VecScatter ctx; 5835 MPI_Comm comm; 5836 const PetscMPIInt *rprocs, *sprocs; 5837 const PetscInt *srow, *rstarts, *sstarts; 5838 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5839 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5840 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5841 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5842 PetscMPIInt size, tag, rank, nreqs; 5843 5844 PetscFunctionBegin; 5845 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5846 PetscCallMPI(MPI_Comm_size(comm, &size)); 5847 5848 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5849 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5850 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5851 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5852 5853 if (size == 1) { 5854 startsj_s = NULL; 5855 bufa_ptr = NULL; 5856 *B_oth = NULL; 5857 PetscFunctionReturn(PETSC_SUCCESS); 5858 } 5859 5860 ctx = a->Mvctx; 5861 tag = ((PetscObject)ctx)->tag; 5862 5863 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5864 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5865 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5866 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5867 PetscCall(PetscMalloc1(nreqs, &reqs)); 5868 rwaits = reqs; 5869 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5870 5871 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5872 if (scall == MAT_INITIAL_MATRIX) { 5873 /* i-array */ 5874 /* post receives */ 5875 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5876 for (i = 0; i < nrecvs; i++) { 5877 rowlen = rvalues + rstarts[i] * rbs; 5878 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5879 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5880 } 5881 5882 /* pack the outgoing message */ 5883 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5884 5885 sstartsj[0] = 0; 5886 rstartsj[0] = 0; 5887 len = 0; /* total length of j or a array to be sent */ 5888 if (nsends) { 5889 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5890 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5891 } 5892 for (i = 0; i < nsends; i++) { 5893 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5894 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5895 for (j = 0; j < nrows; j++) { 5896 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5897 for (l = 0; l < sbs; l++) { 5898 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5899 5900 rowlen[j * sbs + l] = ncols; 5901 5902 len += ncols; 5903 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5904 } 5905 k++; 5906 } 5907 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5908 5909 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5910 } 5911 /* recvs and sends of i-array are completed */ 5912 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5913 PetscCall(PetscFree(svalues)); 5914 5915 /* allocate buffers for sending j and a arrays */ 5916 PetscCall(PetscMalloc1(len + 1, &bufj)); 5917 PetscCall(PetscMalloc1(len + 1, &bufa)); 5918 5919 /* create i-array of B_oth */ 5920 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5921 5922 b_othi[0] = 0; 5923 len = 0; /* total length of j or a array to be received */ 5924 k = 0; 5925 for (i = 0; i < nrecvs; i++) { 5926 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5927 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5928 for (j = 0; j < nrows; j++) { 5929 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5930 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5931 k++; 5932 } 5933 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5934 } 5935 PetscCall(PetscFree(rvalues)); 5936 5937 /* allocate space for j and a arrays of B_oth */ 5938 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5939 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5940 5941 /* j-array */ 5942 /* post receives of j-array */ 5943 for (i = 0; i < nrecvs; i++) { 5944 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5945 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5946 } 5947 5948 /* pack the outgoing message j-array */ 5949 if (nsends) k = sstarts[0]; 5950 for (i = 0; i < nsends; i++) { 5951 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5952 bufJ = bufj + sstartsj[i]; 5953 for (j = 0; j < nrows; j++) { 5954 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5955 for (ll = 0; ll < sbs; ll++) { 5956 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5957 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5958 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5959 } 5960 } 5961 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5962 } 5963 5964 /* recvs and sends of j-array are completed */ 5965 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5966 } else if (scall == MAT_REUSE_MATRIX) { 5967 sstartsj = *startsj_s; 5968 rstartsj = *startsj_r; 5969 bufa = *bufa_ptr; 5970 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5971 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5972 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5973 5974 /* a-array */ 5975 /* post receives of a-array */ 5976 for (i = 0; i < nrecvs; i++) { 5977 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5978 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5979 } 5980 5981 /* pack the outgoing message a-array */ 5982 if (nsends) k = sstarts[0]; 5983 for (i = 0; i < nsends; i++) { 5984 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5985 bufA = bufa + sstartsj[i]; 5986 for (j = 0; j < nrows; j++) { 5987 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5988 for (ll = 0; ll < sbs; ll++) { 5989 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5990 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5991 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5992 } 5993 } 5994 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5995 } 5996 /* recvs and sends of a-array are completed */ 5997 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5998 PetscCall(PetscFree(reqs)); 5999 6000 if (scall == MAT_INITIAL_MATRIX) { 6001 /* put together the new matrix */ 6002 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6003 6004 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6005 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6006 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6007 b_oth->free_a = PETSC_TRUE; 6008 b_oth->free_ij = PETSC_TRUE; 6009 b_oth->nonew = 0; 6010 6011 PetscCall(PetscFree(bufj)); 6012 if (!startsj_s || !bufa_ptr) { 6013 PetscCall(PetscFree2(sstartsj, rstartsj)); 6014 PetscCall(PetscFree(bufa_ptr)); 6015 } else { 6016 *startsj_s = sstartsj; 6017 *startsj_r = rstartsj; 6018 *bufa_ptr = bufa; 6019 } 6020 } else if (scall == MAT_REUSE_MATRIX) { 6021 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6022 } 6023 6024 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6025 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6026 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6027 PetscFunctionReturn(PETSC_SUCCESS); 6028 } 6029 6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6033 #if defined(PETSC_HAVE_MKL_SPARSE) 6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6035 #endif 6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6038 #if defined(PETSC_HAVE_ELEMENTAL) 6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6040 #endif 6041 #if defined(PETSC_HAVE_SCALAPACK) 6042 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6043 #endif 6044 #if defined(PETSC_HAVE_HYPRE) 6045 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6046 #endif 6047 #if defined(PETSC_HAVE_CUDA) 6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6049 #endif 6050 #if defined(PETSC_HAVE_HIP) 6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6052 #endif 6053 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6055 #endif 6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6057 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6058 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6059 6060 /* 6061 Computes (B'*A')' since computing B*A directly is untenable 6062 6063 n p p 6064 [ ] [ ] [ ] 6065 m [ A ] * n [ B ] = m [ C ] 6066 [ ] [ ] [ ] 6067 6068 */ 6069 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6070 { 6071 Mat At, Bt, Ct; 6072 6073 PetscFunctionBegin; 6074 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6075 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6076 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6077 PetscCall(MatDestroy(&At)); 6078 PetscCall(MatDestroy(&Bt)); 6079 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6080 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6081 PetscCall(MatDestroy(&Ct)); 6082 PetscFunctionReturn(PETSC_SUCCESS); 6083 } 6084 6085 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6086 { 6087 PetscBool cisdense; 6088 6089 PetscFunctionBegin; 6090 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6091 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6092 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6093 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6094 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6095 PetscCall(MatSetUp(C)); 6096 6097 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6098 PetscFunctionReturn(PETSC_SUCCESS); 6099 } 6100 6101 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6102 { 6103 Mat_Product *product = C->product; 6104 Mat A = product->A, B = product->B; 6105 6106 PetscFunctionBegin; 6107 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6108 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6109 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6110 C->ops->productsymbolic = MatProductSymbolic_AB; 6111 PetscFunctionReturn(PETSC_SUCCESS); 6112 } 6113 6114 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6115 { 6116 Mat_Product *product = C->product; 6117 6118 PetscFunctionBegin; 6119 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6120 PetscFunctionReturn(PETSC_SUCCESS); 6121 } 6122 6123 /* 6124 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6125 6126 Input Parameters: 6127 6128 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6129 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6130 6131 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6132 6133 For Set1, j1[] contains column indices of the nonzeros. 6134 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6135 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6136 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6137 6138 Similar for Set2. 6139 6140 This routine merges the two sets of nonzeros row by row and removes repeats. 6141 6142 Output Parameters: (memory is allocated by the caller) 6143 6144 i[],j[]: the CSR of the merged matrix, which has m rows. 6145 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6146 imap2[]: similar to imap1[], but for Set2. 6147 Note we order nonzeros row-by-row and from left to right. 6148 */ 6149 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6150 { 6151 PetscInt r, m; /* Row index of mat */ 6152 PetscCount t, t1, t2, b1, e1, b2, e2; 6153 6154 PetscFunctionBegin; 6155 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6156 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6157 i[0] = 0; 6158 for (r = 0; r < m; r++) { /* Do row by row merging */ 6159 b1 = rowBegin1[r]; 6160 e1 = rowEnd1[r]; 6161 b2 = rowBegin2[r]; 6162 e2 = rowEnd2[r]; 6163 while (b1 < e1 && b2 < e2) { 6164 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6165 j[t] = j1[b1]; 6166 imap1[t1] = t; 6167 imap2[t2] = t; 6168 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6169 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6170 t1++; 6171 t2++; 6172 t++; 6173 } else if (j1[b1] < j2[b2]) { 6174 j[t] = j1[b1]; 6175 imap1[t1] = t; 6176 b1 += jmap1[t1 + 1] - jmap1[t1]; 6177 t1++; 6178 t++; 6179 } else { 6180 j[t] = j2[b2]; 6181 imap2[t2] = t; 6182 b2 += jmap2[t2 + 1] - jmap2[t2]; 6183 t2++; 6184 t++; 6185 } 6186 } 6187 /* Merge the remaining in either j1[] or j2[] */ 6188 while (b1 < e1) { 6189 j[t] = j1[b1]; 6190 imap1[t1] = t; 6191 b1 += jmap1[t1 + 1] - jmap1[t1]; 6192 t1++; 6193 t++; 6194 } 6195 while (b2 < e2) { 6196 j[t] = j2[b2]; 6197 imap2[t2] = t; 6198 b2 += jmap2[t2 + 1] - jmap2[t2]; 6199 t2++; 6200 t++; 6201 } 6202 i[r + 1] = t; 6203 } 6204 PetscFunctionReturn(PETSC_SUCCESS); 6205 } 6206 6207 /* 6208 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6209 6210 Input Parameters: 6211 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6212 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6213 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6214 6215 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6216 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6217 6218 Output Parameters: 6219 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6220 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6221 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6222 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6223 6224 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6225 Atot: number of entries belonging to the diagonal block. 6226 Annz: number of unique nonzeros belonging to the diagonal block. 6227 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6228 repeats (i.e., same 'i,j' pair). 6229 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6230 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6231 6232 Atot: number of entries belonging to the diagonal block 6233 Annz: number of unique nonzeros belonging to the diagonal block. 6234 6235 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6236 6237 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6238 */ 6239 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6240 { 6241 PetscInt cstart, cend, rstart, rend, row, col; 6242 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6243 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6244 PetscCount k, m, p, q, r, s, mid; 6245 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6246 6247 PetscFunctionBegin; 6248 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6249 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6250 m = rend - rstart; 6251 6252 /* Skip negative rows */ 6253 for (k = 0; k < n; k++) 6254 if (i[k] >= 0) break; 6255 6256 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6257 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6258 */ 6259 while (k < n) { 6260 row = i[k]; 6261 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6262 for (s = k; s < n; s++) 6263 if (i[s] != row) break; 6264 6265 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6266 for (p = k; p < s; p++) { 6267 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6268 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6269 } 6270 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6271 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6272 rowBegin[row - rstart] = k; 6273 rowMid[row - rstart] = mid; 6274 rowEnd[row - rstart] = s; 6275 6276 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6277 Atot += mid - k; 6278 Btot += s - mid; 6279 6280 /* Count unique nonzeros of this diag row */ 6281 for (p = k; p < mid;) { 6282 col = j[p]; 6283 do { 6284 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6285 p++; 6286 } while (p < mid && j[p] == col); 6287 Annz++; 6288 } 6289 6290 /* Count unique nonzeros of this offdiag row */ 6291 for (p = mid; p < s;) { 6292 col = j[p]; 6293 do { 6294 p++; 6295 } while (p < s && j[p] == col); 6296 Bnnz++; 6297 } 6298 k = s; 6299 } 6300 6301 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6302 PetscCall(PetscMalloc1(Atot, &Aperm)); 6303 PetscCall(PetscMalloc1(Btot, &Bperm)); 6304 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6305 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6306 6307 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6308 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6309 for (r = 0; r < m; r++) { 6310 k = rowBegin[r]; 6311 mid = rowMid[r]; 6312 s = rowEnd[r]; 6313 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6314 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6315 Atot += mid - k; 6316 Btot += s - mid; 6317 6318 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6319 for (p = k; p < mid;) { 6320 col = j[p]; 6321 q = p; 6322 do { 6323 p++; 6324 } while (p < mid && j[p] == col); 6325 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6326 Annz++; 6327 } 6328 6329 for (p = mid; p < s;) { 6330 col = j[p]; 6331 q = p; 6332 do { 6333 p++; 6334 } while (p < s && j[p] == col); 6335 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6336 Bnnz++; 6337 } 6338 } 6339 /* Output */ 6340 *Aperm_ = Aperm; 6341 *Annz_ = Annz; 6342 *Atot_ = Atot; 6343 *Ajmap_ = Ajmap; 6344 *Bperm_ = Bperm; 6345 *Bnnz_ = Bnnz; 6346 *Btot_ = Btot; 6347 *Bjmap_ = Bjmap; 6348 PetscFunctionReturn(PETSC_SUCCESS); 6349 } 6350 6351 /* 6352 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6353 6354 Input Parameters: 6355 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6356 nnz: number of unique nonzeros in the merged matrix 6357 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6358 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6359 6360 Output Parameter: (memory is allocated by the caller) 6361 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6362 6363 Example: 6364 nnz1 = 4 6365 nnz = 6 6366 imap = [1,3,4,5] 6367 jmap = [0,3,5,6,7] 6368 then, 6369 jmap_new = [0,0,3,3,5,6,7] 6370 */ 6371 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6372 { 6373 PetscCount k, p; 6374 6375 PetscFunctionBegin; 6376 jmap_new[0] = 0; 6377 p = nnz; /* p loops over jmap_new[] backwards */ 6378 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6379 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6380 } 6381 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6382 PetscFunctionReturn(PETSC_SUCCESS); 6383 } 6384 6385 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6386 { 6387 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6388 6389 PetscFunctionBegin; 6390 PetscCall(PetscSFDestroy(&coo->sf)); 6391 PetscCall(PetscFree(coo->Aperm1)); 6392 PetscCall(PetscFree(coo->Bperm1)); 6393 PetscCall(PetscFree(coo->Ajmap1)); 6394 PetscCall(PetscFree(coo->Bjmap1)); 6395 PetscCall(PetscFree(coo->Aimap2)); 6396 PetscCall(PetscFree(coo->Bimap2)); 6397 PetscCall(PetscFree(coo->Aperm2)); 6398 PetscCall(PetscFree(coo->Bperm2)); 6399 PetscCall(PetscFree(coo->Ajmap2)); 6400 PetscCall(PetscFree(coo->Bjmap2)); 6401 PetscCall(PetscFree(coo->Cperm1)); 6402 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6403 PetscCall(PetscFree(coo)); 6404 PetscFunctionReturn(PETSC_SUCCESS); 6405 } 6406 6407 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6408 { 6409 MPI_Comm comm; 6410 PetscMPIInt rank, size; 6411 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6412 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6413 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6414 PetscContainer container; 6415 MatCOOStruct_MPIAIJ *coo; 6416 6417 PetscFunctionBegin; 6418 PetscCall(PetscFree(mpiaij->garray)); 6419 PetscCall(VecDestroy(&mpiaij->lvec)); 6420 #if defined(PETSC_USE_CTABLE) 6421 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6422 #else 6423 PetscCall(PetscFree(mpiaij->colmap)); 6424 #endif 6425 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6426 mat->assembled = PETSC_FALSE; 6427 mat->was_assembled = PETSC_FALSE; 6428 6429 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6430 PetscCallMPI(MPI_Comm_size(comm, &size)); 6431 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6432 PetscCall(PetscLayoutSetUp(mat->rmap)); 6433 PetscCall(PetscLayoutSetUp(mat->cmap)); 6434 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6435 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6436 PetscCall(MatGetLocalSize(mat, &m, &n)); 6437 PetscCall(MatGetSize(mat, &M, &N)); 6438 6439 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6440 /* entries come first, then local rows, then remote rows. */ 6441 PetscCount n1 = coo_n, *perm1; 6442 PetscInt *i1 = coo_i, *j1 = coo_j; 6443 6444 PetscCall(PetscMalloc1(n1, &perm1)); 6445 for (k = 0; k < n1; k++) perm1[k] = k; 6446 6447 /* Manipulate indices so that entries with negative row or col indices will have smallest 6448 row indices, local entries will have greater but negative row indices, and remote entries 6449 will have positive row indices. 6450 */ 6451 for (k = 0; k < n1; k++) { 6452 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6453 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6454 else { 6455 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6456 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6457 } 6458 } 6459 6460 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6461 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6462 6463 /* Advance k to the first entry we need to take care of */ 6464 for (k = 0; k < n1; k++) 6465 if (i1[k] > PETSC_MIN_INT) break; 6466 PetscInt i1start = k; 6467 6468 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6469 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6470 6471 /* Send remote rows to their owner */ 6472 /* Find which rows should be sent to which remote ranks*/ 6473 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6474 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6475 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6476 const PetscInt *ranges; 6477 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6478 6479 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6480 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6481 for (k = rem; k < n1;) { 6482 PetscMPIInt owner; 6483 PetscInt firstRow, lastRow; 6484 6485 /* Locate a row range */ 6486 firstRow = i1[k]; /* first row of this owner */ 6487 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6488 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6489 6490 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6491 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6492 6493 /* All entries in [k,p) belong to this remote owner */ 6494 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6495 PetscMPIInt *sendto2; 6496 PetscInt *nentries2; 6497 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6498 6499 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6500 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6501 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6502 PetscCall(PetscFree2(sendto, nentries2)); 6503 sendto = sendto2; 6504 nentries = nentries2; 6505 maxNsend = maxNsend2; 6506 } 6507 sendto[nsend] = owner; 6508 nentries[nsend] = p - k; 6509 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6510 nsend++; 6511 k = p; 6512 } 6513 6514 /* Build 1st SF to know offsets on remote to send data */ 6515 PetscSF sf1; 6516 PetscInt nroots = 1, nroots2 = 0; 6517 PetscInt nleaves = nsend, nleaves2 = 0; 6518 PetscInt *offsets; 6519 PetscSFNode *iremote; 6520 6521 PetscCall(PetscSFCreate(comm, &sf1)); 6522 PetscCall(PetscMalloc1(nsend, &iremote)); 6523 PetscCall(PetscMalloc1(nsend, &offsets)); 6524 for (k = 0; k < nsend; k++) { 6525 iremote[k].rank = sendto[k]; 6526 iremote[k].index = 0; 6527 nleaves2 += nentries[k]; 6528 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6529 } 6530 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6531 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6532 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6533 PetscCall(PetscSFDestroy(&sf1)); 6534 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6535 6536 /* Build 2nd SF to send remote COOs to their owner */ 6537 PetscSF sf2; 6538 nroots = nroots2; 6539 nleaves = nleaves2; 6540 PetscCall(PetscSFCreate(comm, &sf2)); 6541 PetscCall(PetscSFSetFromOptions(sf2)); 6542 PetscCall(PetscMalloc1(nleaves, &iremote)); 6543 p = 0; 6544 for (k = 0; k < nsend; k++) { 6545 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6546 for (q = 0; q < nentries[k]; q++, p++) { 6547 iremote[p].rank = sendto[k]; 6548 iremote[p].index = offsets[k] + q; 6549 } 6550 } 6551 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6552 6553 /* Send the remote COOs to their owner */ 6554 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6555 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6556 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6557 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6558 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6559 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6560 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6561 6562 PetscCall(PetscFree(offsets)); 6563 PetscCall(PetscFree2(sendto, nentries)); 6564 6565 /* Sort received COOs by row along with the permutation array */ 6566 for (k = 0; k < n2; k++) perm2[k] = k; 6567 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6568 6569 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6570 PetscCount *Cperm1; 6571 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6572 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves)); 6573 6574 /* Support for HYPRE matrices, kind of a hack. 6575 Swap min column with diagonal so that diagonal values will go first */ 6576 PetscBool hypre; 6577 const char *name; 6578 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6579 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6580 if (hypre) { 6581 PetscInt *minj; 6582 PetscBT hasdiag; 6583 6584 PetscCall(PetscBTCreate(m, &hasdiag)); 6585 PetscCall(PetscMalloc1(m, &minj)); 6586 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6587 for (k = i1start; k < rem; k++) { 6588 if (j1[k] < cstart || j1[k] >= cend) continue; 6589 const PetscInt rindex = i1[k] - rstart; 6590 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6591 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6592 } 6593 for (k = 0; k < n2; k++) { 6594 if (j2[k] < cstart || j2[k] >= cend) continue; 6595 const PetscInt rindex = i2[k] - rstart; 6596 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6597 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6598 } 6599 for (k = i1start; k < rem; k++) { 6600 const PetscInt rindex = i1[k] - rstart; 6601 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6602 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6603 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6604 } 6605 for (k = 0; k < n2; k++) { 6606 const PetscInt rindex = i2[k] - rstart; 6607 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6608 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6609 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6610 } 6611 PetscCall(PetscBTDestroy(&hasdiag)); 6612 PetscCall(PetscFree(minj)); 6613 } 6614 6615 /* Split local COOs and received COOs into diag/offdiag portions */ 6616 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6617 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6618 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6619 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6620 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6621 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6622 6623 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6624 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6625 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6626 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6627 6628 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6629 PetscInt *Ai, *Bi; 6630 PetscInt *Aj, *Bj; 6631 6632 PetscCall(PetscMalloc1(m + 1, &Ai)); 6633 PetscCall(PetscMalloc1(m + 1, &Bi)); 6634 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6635 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6636 6637 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6638 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6639 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6640 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6641 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6642 6643 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6644 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6645 6646 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6647 /* expect nonzeros in A/B most likely have local contributing entries */ 6648 PetscInt Annz = Ai[m]; 6649 PetscInt Bnnz = Bi[m]; 6650 PetscCount *Ajmap1_new, *Bjmap1_new; 6651 6652 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6653 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6654 6655 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6656 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6657 6658 PetscCall(PetscFree(Aimap1)); 6659 PetscCall(PetscFree(Ajmap1)); 6660 PetscCall(PetscFree(Bimap1)); 6661 PetscCall(PetscFree(Bjmap1)); 6662 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6663 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6664 PetscCall(PetscFree(perm1)); 6665 PetscCall(PetscFree3(i2, j2, perm2)); 6666 6667 Ajmap1 = Ajmap1_new; 6668 Bjmap1 = Bjmap1_new; 6669 6670 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6671 if (Annz < Annz1 + Annz2) { 6672 PetscInt *Aj_new; 6673 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6674 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6675 PetscCall(PetscFree(Aj)); 6676 Aj = Aj_new; 6677 } 6678 6679 if (Bnnz < Bnnz1 + Bnnz2) { 6680 PetscInt *Bj_new; 6681 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6682 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6683 PetscCall(PetscFree(Bj)); 6684 Bj = Bj_new; 6685 } 6686 6687 /* Create new submatrices for on-process and off-process coupling */ 6688 PetscScalar *Aa, *Ba; 6689 MatType rtype; 6690 Mat_SeqAIJ *a, *b; 6691 PetscObjectState state; 6692 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6693 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6694 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6695 if (cstart) { 6696 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6697 } 6698 PetscCall(MatDestroy(&mpiaij->A)); 6699 PetscCall(MatDestroy(&mpiaij->B)); 6700 PetscCall(MatGetRootType_Private(mat, &rtype)); 6701 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6702 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6703 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6704 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6705 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6706 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6707 6708 a = (Mat_SeqAIJ *)mpiaij->A->data; 6709 b = (Mat_SeqAIJ *)mpiaij->B->data; 6710 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6711 a->free_a = b->free_a = PETSC_TRUE; 6712 a->free_ij = b->free_ij = PETSC_TRUE; 6713 6714 /* conversion must happen AFTER multiply setup */ 6715 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6716 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6717 PetscCall(VecDestroy(&mpiaij->lvec)); 6718 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6719 6720 // Put the COO struct in a container and then attach that to the matrix 6721 PetscCall(PetscMalloc1(1, &coo)); 6722 coo->n = coo_n; 6723 coo->sf = sf2; 6724 coo->sendlen = nleaves; 6725 coo->recvlen = nroots; 6726 coo->Annz = Annz; 6727 coo->Bnnz = Bnnz; 6728 coo->Annz2 = Annz2; 6729 coo->Bnnz2 = Bnnz2; 6730 coo->Atot1 = Atot1; 6731 coo->Atot2 = Atot2; 6732 coo->Btot1 = Btot1; 6733 coo->Btot2 = Btot2; 6734 coo->Ajmap1 = Ajmap1; 6735 coo->Aperm1 = Aperm1; 6736 coo->Bjmap1 = Bjmap1; 6737 coo->Bperm1 = Bperm1; 6738 coo->Aimap2 = Aimap2; 6739 coo->Ajmap2 = Ajmap2; 6740 coo->Aperm2 = Aperm2; 6741 coo->Bimap2 = Bimap2; 6742 coo->Bjmap2 = Bjmap2; 6743 coo->Bperm2 = Bperm2; 6744 coo->Cperm1 = Cperm1; 6745 // Allocate in preallocation. If not used, it has zero cost on host 6746 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6747 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6748 PetscCall(PetscContainerSetPointer(container, coo)); 6749 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6750 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6751 PetscCall(PetscContainerDestroy(&container)); 6752 PetscFunctionReturn(PETSC_SUCCESS); 6753 } 6754 6755 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6756 { 6757 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6758 Mat A = mpiaij->A, B = mpiaij->B; 6759 PetscScalar *Aa, *Ba; 6760 PetscScalar *sendbuf, *recvbuf; 6761 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6762 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6763 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6764 const PetscCount *Cperm1; 6765 PetscContainer container; 6766 MatCOOStruct_MPIAIJ *coo; 6767 6768 PetscFunctionBegin; 6769 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6770 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6771 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6772 sendbuf = coo->sendbuf; 6773 recvbuf = coo->recvbuf; 6774 Ajmap1 = coo->Ajmap1; 6775 Ajmap2 = coo->Ajmap2; 6776 Aimap2 = coo->Aimap2; 6777 Bjmap1 = coo->Bjmap1; 6778 Bjmap2 = coo->Bjmap2; 6779 Bimap2 = coo->Bimap2; 6780 Aperm1 = coo->Aperm1; 6781 Aperm2 = coo->Aperm2; 6782 Bperm1 = coo->Bperm1; 6783 Bperm2 = coo->Bperm2; 6784 Cperm1 = coo->Cperm1; 6785 6786 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6787 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6788 6789 /* Pack entries to be sent to remote */ 6790 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6791 6792 /* Send remote entries to their owner and overlap the communication with local computation */ 6793 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6794 /* Add local entries to A and B */ 6795 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6796 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6797 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6798 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6799 } 6800 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6801 PetscScalar sum = 0.0; 6802 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6803 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6804 } 6805 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6806 6807 /* Add received remote entries to A and B */ 6808 for (PetscCount i = 0; i < coo->Annz2; i++) { 6809 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6810 } 6811 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6812 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6813 } 6814 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6815 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6816 PetscFunctionReturn(PETSC_SUCCESS); 6817 } 6818 6819 /*MC 6820 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6821 6822 Options Database Keys: 6823 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6824 6825 Level: beginner 6826 6827 Notes: 6828 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6829 in this case the values associated with the rows and columns one passes in are set to zero 6830 in the matrix 6831 6832 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6833 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6834 6835 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6836 M*/ 6837 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6838 { 6839 Mat_MPIAIJ *b; 6840 PetscMPIInt size; 6841 6842 PetscFunctionBegin; 6843 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6844 6845 PetscCall(PetscNew(&b)); 6846 B->data = (void *)b; 6847 B->ops[0] = MatOps_Values; 6848 B->assembled = PETSC_FALSE; 6849 B->insertmode = NOT_SET_VALUES; 6850 b->size = size; 6851 6852 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6853 6854 /* build cache for off array entries formed */ 6855 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6856 6857 b->donotstash = PETSC_FALSE; 6858 b->colmap = NULL; 6859 b->garray = NULL; 6860 b->roworiented = PETSC_TRUE; 6861 6862 /* stuff used for matrix vector multiply */ 6863 b->lvec = NULL; 6864 b->Mvctx = NULL; 6865 6866 /* stuff for MatGetRow() */ 6867 b->rowindices = NULL; 6868 b->rowvalues = NULL; 6869 b->getrowactive = PETSC_FALSE; 6870 6871 /* flexible pointer used in CUSPARSE classes */ 6872 b->spptr = NULL; 6873 6874 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6875 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6876 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6877 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6878 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6879 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6880 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6881 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6882 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6883 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6884 #if defined(PETSC_HAVE_CUDA) 6885 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6886 #endif 6887 #if defined(PETSC_HAVE_HIP) 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6889 #endif 6890 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6892 #endif 6893 #if defined(PETSC_HAVE_MKL_SPARSE) 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6895 #endif 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6900 #if defined(PETSC_HAVE_ELEMENTAL) 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6902 #endif 6903 #if defined(PETSC_HAVE_SCALAPACK) 6904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6905 #endif 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6908 #if defined(PETSC_HAVE_HYPRE) 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6911 #endif 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6916 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6917 PetscFunctionReturn(PETSC_SUCCESS); 6918 } 6919 6920 /*@C 6921 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6922 and "off-diagonal" part of the matrix in CSR format. 6923 6924 Collective 6925 6926 Input Parameters: 6927 + comm - MPI communicator 6928 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6929 . n - This value should be the same as the local size used in creating the 6930 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6931 calculated if `N` is given) For square matrices `n` is almost always `m`. 6932 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6933 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6934 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6935 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6936 . a - matrix values 6937 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6938 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6939 - oa - matrix values 6940 6941 Output Parameter: 6942 . mat - the matrix 6943 6944 Level: advanced 6945 6946 Notes: 6947 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6948 must free the arrays once the matrix has been destroyed and not before. 6949 6950 The `i` and `j` indices are 0 based 6951 6952 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6953 6954 This sets local rows and cannot be used to set off-processor values. 6955 6956 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6957 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6958 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6959 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6960 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6961 communication if it is known that only local entries will be set. 6962 6963 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6964 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6965 @*/ 6966 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6967 { 6968 Mat_MPIAIJ *maij; 6969 6970 PetscFunctionBegin; 6971 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6972 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6973 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6974 PetscCall(MatCreate(comm, mat)); 6975 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6976 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6977 maij = (Mat_MPIAIJ *)(*mat)->data; 6978 6979 (*mat)->preallocated = PETSC_TRUE; 6980 6981 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6982 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6983 6984 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6985 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6986 6987 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6988 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6989 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6990 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6991 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6992 PetscFunctionReturn(PETSC_SUCCESS); 6993 } 6994 6995 typedef struct { 6996 Mat *mp; /* intermediate products */ 6997 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6998 PetscInt cp; /* number of intermediate products */ 6999 7000 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7001 PetscInt *startsj_s, *startsj_r; 7002 PetscScalar *bufa; 7003 Mat P_oth; 7004 7005 /* may take advantage of merging product->B */ 7006 Mat Bloc; /* B-local by merging diag and off-diag */ 7007 7008 /* cusparse does not have support to split between symbolic and numeric phases. 7009 When api_user is true, we don't need to update the numerical values 7010 of the temporary storage */ 7011 PetscBool reusesym; 7012 7013 /* support for COO values insertion */ 7014 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7015 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7016 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7017 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7018 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7019 PetscMemType mtype; 7020 7021 /* customization */ 7022 PetscBool abmerge; 7023 PetscBool P_oth_bind; 7024 } MatMatMPIAIJBACKEND; 7025 7026 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7027 { 7028 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7029 PetscInt i; 7030 7031 PetscFunctionBegin; 7032 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7033 PetscCall(PetscFree(mmdata->bufa)); 7034 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7035 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7036 PetscCall(MatDestroy(&mmdata->P_oth)); 7037 PetscCall(MatDestroy(&mmdata->Bloc)); 7038 PetscCall(PetscSFDestroy(&mmdata->sf)); 7039 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7040 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7041 PetscCall(PetscFree(mmdata->own[0])); 7042 PetscCall(PetscFree(mmdata->own)); 7043 PetscCall(PetscFree(mmdata->off[0])); 7044 PetscCall(PetscFree(mmdata->off)); 7045 PetscCall(PetscFree(mmdata)); 7046 PetscFunctionReturn(PETSC_SUCCESS); 7047 } 7048 7049 /* Copy selected n entries with indices in idx[] of A to v[]. 7050 If idx is NULL, copy the whole data array of A to v[] 7051 */ 7052 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7053 { 7054 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7055 7056 PetscFunctionBegin; 7057 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7058 if (f) { 7059 PetscCall((*f)(A, n, idx, v)); 7060 } else { 7061 const PetscScalar *vv; 7062 7063 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7064 if (n && idx) { 7065 PetscScalar *w = v; 7066 const PetscInt *oi = idx; 7067 PetscInt j; 7068 7069 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7070 } else { 7071 PetscCall(PetscArraycpy(v, vv, n)); 7072 } 7073 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7074 } 7075 PetscFunctionReturn(PETSC_SUCCESS); 7076 } 7077 7078 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7079 { 7080 MatMatMPIAIJBACKEND *mmdata; 7081 PetscInt i, n_d, n_o; 7082 7083 PetscFunctionBegin; 7084 MatCheckProduct(C, 1); 7085 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7086 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7087 if (!mmdata->reusesym) { /* update temporary matrices */ 7088 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7089 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7090 } 7091 mmdata->reusesym = PETSC_FALSE; 7092 7093 for (i = 0; i < mmdata->cp; i++) { 7094 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7095 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7096 } 7097 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7098 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7099 7100 if (mmdata->mptmp[i]) continue; 7101 if (noff) { 7102 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7103 7104 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7105 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7106 n_o += noff; 7107 n_d += nown; 7108 } else { 7109 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7110 7111 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7112 n_d += mm->nz; 7113 } 7114 } 7115 if (mmdata->hasoffproc) { /* offprocess insertion */ 7116 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7117 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7118 } 7119 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7120 PetscFunctionReturn(PETSC_SUCCESS); 7121 } 7122 7123 /* Support for Pt * A, A * P, or Pt * A * P */ 7124 #define MAX_NUMBER_INTERMEDIATE 4 7125 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7126 { 7127 Mat_Product *product = C->product; 7128 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7129 Mat_MPIAIJ *a, *p; 7130 MatMatMPIAIJBACKEND *mmdata; 7131 ISLocalToGlobalMapping P_oth_l2g = NULL; 7132 IS glob = NULL; 7133 const char *prefix; 7134 char pprefix[256]; 7135 const PetscInt *globidx, *P_oth_idx; 7136 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7137 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7138 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7139 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7140 /* a base offset; type-2: sparse with a local to global map table */ 7141 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7142 7143 MatProductType ptype; 7144 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7145 PetscMPIInt size; 7146 7147 PetscFunctionBegin; 7148 MatCheckProduct(C, 1); 7149 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7150 ptype = product->type; 7151 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7152 ptype = MATPRODUCT_AB; 7153 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7154 } 7155 switch (ptype) { 7156 case MATPRODUCT_AB: 7157 A = product->A; 7158 P = product->B; 7159 m = A->rmap->n; 7160 n = P->cmap->n; 7161 M = A->rmap->N; 7162 N = P->cmap->N; 7163 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7164 break; 7165 case MATPRODUCT_AtB: 7166 P = product->A; 7167 A = product->B; 7168 m = P->cmap->n; 7169 n = A->cmap->n; 7170 M = P->cmap->N; 7171 N = A->cmap->N; 7172 hasoffproc = PETSC_TRUE; 7173 break; 7174 case MATPRODUCT_PtAP: 7175 A = product->A; 7176 P = product->B; 7177 m = P->cmap->n; 7178 n = P->cmap->n; 7179 M = P->cmap->N; 7180 N = P->cmap->N; 7181 hasoffproc = PETSC_TRUE; 7182 break; 7183 default: 7184 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7185 } 7186 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7187 if (size == 1) hasoffproc = PETSC_FALSE; 7188 7189 /* defaults */ 7190 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7191 mp[i] = NULL; 7192 mptmp[i] = PETSC_FALSE; 7193 rmapt[i] = -1; 7194 cmapt[i] = -1; 7195 rmapa[i] = NULL; 7196 cmapa[i] = NULL; 7197 } 7198 7199 /* customization */ 7200 PetscCall(PetscNew(&mmdata)); 7201 mmdata->reusesym = product->api_user; 7202 if (ptype == MATPRODUCT_AB) { 7203 if (product->api_user) { 7204 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7205 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7206 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7207 PetscOptionsEnd(); 7208 } else { 7209 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7210 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7211 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7212 PetscOptionsEnd(); 7213 } 7214 } else if (ptype == MATPRODUCT_PtAP) { 7215 if (product->api_user) { 7216 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7217 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7218 PetscOptionsEnd(); 7219 } else { 7220 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7221 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7222 PetscOptionsEnd(); 7223 } 7224 } 7225 a = (Mat_MPIAIJ *)A->data; 7226 p = (Mat_MPIAIJ *)P->data; 7227 PetscCall(MatSetSizes(C, m, n, M, N)); 7228 PetscCall(PetscLayoutSetUp(C->rmap)); 7229 PetscCall(PetscLayoutSetUp(C->cmap)); 7230 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7231 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7232 7233 cp = 0; 7234 switch (ptype) { 7235 case MATPRODUCT_AB: /* A * P */ 7236 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7237 7238 /* A_diag * P_local (merged or not) */ 7239 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7240 /* P is product->B */ 7241 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7242 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7243 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7244 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7245 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7246 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7247 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7248 mp[cp]->product->api_user = product->api_user; 7249 PetscCall(MatProductSetFromOptions(mp[cp])); 7250 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7251 PetscCall(ISGetIndices(glob, &globidx)); 7252 rmapt[cp] = 1; 7253 cmapt[cp] = 2; 7254 cmapa[cp] = globidx; 7255 mptmp[cp] = PETSC_FALSE; 7256 cp++; 7257 } else { /* A_diag * P_diag and A_diag * P_off */ 7258 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7259 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7260 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7261 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7262 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7263 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7264 mp[cp]->product->api_user = product->api_user; 7265 PetscCall(MatProductSetFromOptions(mp[cp])); 7266 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7267 rmapt[cp] = 1; 7268 cmapt[cp] = 1; 7269 mptmp[cp] = PETSC_FALSE; 7270 cp++; 7271 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7272 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7273 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7274 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7275 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7276 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7277 mp[cp]->product->api_user = product->api_user; 7278 PetscCall(MatProductSetFromOptions(mp[cp])); 7279 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7280 rmapt[cp] = 1; 7281 cmapt[cp] = 2; 7282 cmapa[cp] = p->garray; 7283 mptmp[cp] = PETSC_FALSE; 7284 cp++; 7285 } 7286 7287 /* A_off * P_other */ 7288 if (mmdata->P_oth) { 7289 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7290 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7291 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7292 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7293 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7294 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7295 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7296 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7297 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7298 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7299 mp[cp]->product->api_user = product->api_user; 7300 PetscCall(MatProductSetFromOptions(mp[cp])); 7301 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7302 rmapt[cp] = 1; 7303 cmapt[cp] = 2; 7304 cmapa[cp] = P_oth_idx; 7305 mptmp[cp] = PETSC_FALSE; 7306 cp++; 7307 } 7308 break; 7309 7310 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7311 /* A is product->B */ 7312 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7313 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7314 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7315 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7316 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7317 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7318 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7319 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7320 mp[cp]->product->api_user = product->api_user; 7321 PetscCall(MatProductSetFromOptions(mp[cp])); 7322 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7323 PetscCall(ISGetIndices(glob, &globidx)); 7324 rmapt[cp] = 2; 7325 rmapa[cp] = globidx; 7326 cmapt[cp] = 2; 7327 cmapa[cp] = globidx; 7328 mptmp[cp] = PETSC_FALSE; 7329 cp++; 7330 } else { 7331 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7332 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7333 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7334 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7335 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7336 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7337 mp[cp]->product->api_user = product->api_user; 7338 PetscCall(MatProductSetFromOptions(mp[cp])); 7339 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7340 PetscCall(ISGetIndices(glob, &globidx)); 7341 rmapt[cp] = 1; 7342 cmapt[cp] = 2; 7343 cmapa[cp] = globidx; 7344 mptmp[cp] = PETSC_FALSE; 7345 cp++; 7346 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7347 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7348 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7349 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7350 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7351 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7352 mp[cp]->product->api_user = product->api_user; 7353 PetscCall(MatProductSetFromOptions(mp[cp])); 7354 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7355 rmapt[cp] = 2; 7356 rmapa[cp] = p->garray; 7357 cmapt[cp] = 2; 7358 cmapa[cp] = globidx; 7359 mptmp[cp] = PETSC_FALSE; 7360 cp++; 7361 } 7362 break; 7363 case MATPRODUCT_PtAP: 7364 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7365 /* P is product->B */ 7366 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7367 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7368 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7369 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7370 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7371 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7372 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7373 mp[cp]->product->api_user = product->api_user; 7374 PetscCall(MatProductSetFromOptions(mp[cp])); 7375 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7376 PetscCall(ISGetIndices(glob, &globidx)); 7377 rmapt[cp] = 2; 7378 rmapa[cp] = globidx; 7379 cmapt[cp] = 2; 7380 cmapa[cp] = globidx; 7381 mptmp[cp] = PETSC_FALSE; 7382 cp++; 7383 if (mmdata->P_oth) { 7384 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7385 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7386 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7387 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7388 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7389 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7390 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7391 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7392 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7393 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7394 mp[cp]->product->api_user = product->api_user; 7395 PetscCall(MatProductSetFromOptions(mp[cp])); 7396 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7397 mptmp[cp] = PETSC_TRUE; 7398 cp++; 7399 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7400 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7401 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7402 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7403 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7404 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7405 mp[cp]->product->api_user = product->api_user; 7406 PetscCall(MatProductSetFromOptions(mp[cp])); 7407 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7408 rmapt[cp] = 2; 7409 rmapa[cp] = globidx; 7410 cmapt[cp] = 2; 7411 cmapa[cp] = P_oth_idx; 7412 mptmp[cp] = PETSC_FALSE; 7413 cp++; 7414 } 7415 break; 7416 default: 7417 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7418 } 7419 /* sanity check */ 7420 if (size > 1) 7421 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7422 7423 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7424 for (i = 0; i < cp; i++) { 7425 mmdata->mp[i] = mp[i]; 7426 mmdata->mptmp[i] = mptmp[i]; 7427 } 7428 mmdata->cp = cp; 7429 C->product->data = mmdata; 7430 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7431 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7432 7433 /* memory type */ 7434 mmdata->mtype = PETSC_MEMTYPE_HOST; 7435 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7436 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7437 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7438 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7439 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7440 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7441 7442 /* prepare coo coordinates for values insertion */ 7443 7444 /* count total nonzeros of those intermediate seqaij Mats 7445 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7446 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7447 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7448 */ 7449 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7450 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7451 if (mptmp[cp]) continue; 7452 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7453 const PetscInt *rmap = rmapa[cp]; 7454 const PetscInt mr = mp[cp]->rmap->n; 7455 const PetscInt rs = C->rmap->rstart; 7456 const PetscInt re = C->rmap->rend; 7457 const PetscInt *ii = mm->i; 7458 for (i = 0; i < mr; i++) { 7459 const PetscInt gr = rmap[i]; 7460 const PetscInt nz = ii[i + 1] - ii[i]; 7461 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7462 else ncoo_oown += nz; /* this row is local */ 7463 } 7464 } else ncoo_d += mm->nz; 7465 } 7466 7467 /* 7468 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7469 7470 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7471 7472 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7473 7474 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7475 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7476 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7477 7478 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7479 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7480 */ 7481 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7482 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7483 7484 /* gather (i,j) of nonzeros inserted by remote procs */ 7485 if (hasoffproc) { 7486 PetscSF msf; 7487 PetscInt ncoo2, *coo_i2, *coo_j2; 7488 7489 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7490 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7491 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7492 7493 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7494 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7495 PetscInt *idxoff = mmdata->off[cp]; 7496 PetscInt *idxown = mmdata->own[cp]; 7497 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7498 const PetscInt *rmap = rmapa[cp]; 7499 const PetscInt *cmap = cmapa[cp]; 7500 const PetscInt *ii = mm->i; 7501 PetscInt *coi = coo_i + ncoo_o; 7502 PetscInt *coj = coo_j + ncoo_o; 7503 const PetscInt mr = mp[cp]->rmap->n; 7504 const PetscInt rs = C->rmap->rstart; 7505 const PetscInt re = C->rmap->rend; 7506 const PetscInt cs = C->cmap->rstart; 7507 for (i = 0; i < mr; i++) { 7508 const PetscInt *jj = mm->j + ii[i]; 7509 const PetscInt gr = rmap[i]; 7510 const PetscInt nz = ii[i + 1] - ii[i]; 7511 if (gr < rs || gr >= re) { /* this is an offproc row */ 7512 for (j = ii[i]; j < ii[i + 1]; j++) { 7513 *coi++ = gr; 7514 *idxoff++ = j; 7515 } 7516 if (!cmapt[cp]) { /* already global */ 7517 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7518 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7519 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7520 } else { /* offdiag */ 7521 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7522 } 7523 ncoo_o += nz; 7524 } else { /* this is a local row */ 7525 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7526 } 7527 } 7528 } 7529 mmdata->off[cp + 1] = idxoff; 7530 mmdata->own[cp + 1] = idxown; 7531 } 7532 7533 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7534 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7535 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7536 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7537 ncoo = ncoo_d + ncoo_oown + ncoo2; 7538 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7539 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7540 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7541 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7542 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7543 PetscCall(PetscFree2(coo_i, coo_j)); 7544 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7545 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7546 coo_i = coo_i2; 7547 coo_j = coo_j2; 7548 } else { /* no offproc values insertion */ 7549 ncoo = ncoo_d; 7550 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7551 7552 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7553 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7554 PetscCall(PetscSFSetUp(mmdata->sf)); 7555 } 7556 mmdata->hasoffproc = hasoffproc; 7557 7558 /* gather (i,j) of nonzeros inserted locally */ 7559 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7560 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7561 PetscInt *coi = coo_i + ncoo_d; 7562 PetscInt *coj = coo_j + ncoo_d; 7563 const PetscInt *jj = mm->j; 7564 const PetscInt *ii = mm->i; 7565 const PetscInt *cmap = cmapa[cp]; 7566 const PetscInt *rmap = rmapa[cp]; 7567 const PetscInt mr = mp[cp]->rmap->n; 7568 const PetscInt rs = C->rmap->rstart; 7569 const PetscInt re = C->rmap->rend; 7570 const PetscInt cs = C->cmap->rstart; 7571 7572 if (mptmp[cp]) continue; 7573 if (rmapt[cp] == 1) { /* consecutive rows */ 7574 /* fill coo_i */ 7575 for (i = 0; i < mr; i++) { 7576 const PetscInt gr = i + rs; 7577 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7578 } 7579 /* fill coo_j */ 7580 if (!cmapt[cp]) { /* type-0, already global */ 7581 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7582 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7583 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7584 } else { /* type-2, local to global for sparse columns */ 7585 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7586 } 7587 ncoo_d += mm->nz; 7588 } else if (rmapt[cp] == 2) { /* sparse rows */ 7589 for (i = 0; i < mr; i++) { 7590 const PetscInt *jj = mm->j + ii[i]; 7591 const PetscInt gr = rmap[i]; 7592 const PetscInt nz = ii[i + 1] - ii[i]; 7593 if (gr >= rs && gr < re) { /* local rows */ 7594 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7595 if (!cmapt[cp]) { /* type-0, already global */ 7596 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7597 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7598 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7599 } else { /* type-2, local to global for sparse columns */ 7600 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7601 } 7602 ncoo_d += nz; 7603 } 7604 } 7605 } 7606 } 7607 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7608 PetscCall(ISDestroy(&glob)); 7609 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7610 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7611 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7612 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7613 7614 /* preallocate with COO data */ 7615 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7616 PetscCall(PetscFree2(coo_i, coo_j)); 7617 PetscFunctionReturn(PETSC_SUCCESS); 7618 } 7619 7620 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7621 { 7622 Mat_Product *product = mat->product; 7623 #if defined(PETSC_HAVE_DEVICE) 7624 PetscBool match = PETSC_FALSE; 7625 PetscBool usecpu = PETSC_FALSE; 7626 #else 7627 PetscBool match = PETSC_TRUE; 7628 #endif 7629 7630 PetscFunctionBegin; 7631 MatCheckProduct(mat, 1); 7632 #if defined(PETSC_HAVE_DEVICE) 7633 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7634 if (match) { /* we can always fallback to the CPU if requested */ 7635 switch (product->type) { 7636 case MATPRODUCT_AB: 7637 if (product->api_user) { 7638 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7639 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7640 PetscOptionsEnd(); 7641 } else { 7642 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7643 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7644 PetscOptionsEnd(); 7645 } 7646 break; 7647 case MATPRODUCT_AtB: 7648 if (product->api_user) { 7649 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7650 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7651 PetscOptionsEnd(); 7652 } else { 7653 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7654 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7655 PetscOptionsEnd(); 7656 } 7657 break; 7658 case MATPRODUCT_PtAP: 7659 if (product->api_user) { 7660 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7661 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7662 PetscOptionsEnd(); 7663 } else { 7664 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7665 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7666 PetscOptionsEnd(); 7667 } 7668 break; 7669 default: 7670 break; 7671 } 7672 match = (PetscBool)!usecpu; 7673 } 7674 #endif 7675 if (match) { 7676 switch (product->type) { 7677 case MATPRODUCT_AB: 7678 case MATPRODUCT_AtB: 7679 case MATPRODUCT_PtAP: 7680 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7681 break; 7682 default: 7683 break; 7684 } 7685 } 7686 /* fallback to MPIAIJ ops */ 7687 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7688 PetscFunctionReturn(PETSC_SUCCESS); 7689 } 7690 7691 /* 7692 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7693 7694 n - the number of block indices in cc[] 7695 cc - the block indices (must be large enough to contain the indices) 7696 */ 7697 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7698 { 7699 PetscInt cnt = -1, nidx, j; 7700 const PetscInt *idx; 7701 7702 PetscFunctionBegin; 7703 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7704 if (nidx) { 7705 cnt = 0; 7706 cc[cnt] = idx[0] / bs; 7707 for (j = 1; j < nidx; j++) { 7708 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7709 } 7710 } 7711 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7712 *n = cnt + 1; 7713 PetscFunctionReturn(PETSC_SUCCESS); 7714 } 7715 7716 /* 7717 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7718 7719 ncollapsed - the number of block indices 7720 collapsed - the block indices (must be large enough to contain the indices) 7721 */ 7722 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7723 { 7724 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7725 7726 PetscFunctionBegin; 7727 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7728 for (i = start + 1; i < start + bs; i++) { 7729 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7730 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7731 cprevtmp = cprev; 7732 cprev = merged; 7733 merged = cprevtmp; 7734 } 7735 *ncollapsed = nprev; 7736 if (collapsed) *collapsed = cprev; 7737 PetscFunctionReturn(PETSC_SUCCESS); 7738 } 7739 7740 /* 7741 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7742 7743 Input Parameter: 7744 . Amat - matrix 7745 - symmetrize - make the result symmetric 7746 + scale - scale with diagonal 7747 7748 Output Parameter: 7749 . a_Gmat - output scalar graph >= 0 7750 7751 */ 7752 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7753 { 7754 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7755 MPI_Comm comm; 7756 Mat Gmat; 7757 PetscBool ismpiaij, isseqaij; 7758 Mat a, b, c; 7759 MatType jtype; 7760 7761 PetscFunctionBegin; 7762 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7763 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7764 PetscCall(MatGetSize(Amat, &MM, &NN)); 7765 PetscCall(MatGetBlockSize(Amat, &bs)); 7766 nloc = (Iend - Istart) / bs; 7767 7768 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7769 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7770 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7771 7772 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7773 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7774 implementation */ 7775 if (bs > 1) { 7776 PetscCall(MatGetType(Amat, &jtype)); 7777 PetscCall(MatCreate(comm, &Gmat)); 7778 PetscCall(MatSetType(Gmat, jtype)); 7779 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7780 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7781 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7782 PetscInt *d_nnz, *o_nnz; 7783 MatScalar *aa, val, *AA; 7784 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7785 if (isseqaij) { 7786 a = Amat; 7787 b = NULL; 7788 } else { 7789 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7790 a = d->A; 7791 b = d->B; 7792 } 7793 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7794 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7795 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7796 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7797 const PetscInt *cols1, *cols2; 7798 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7799 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7800 nnz[brow / bs] = nc2 / bs; 7801 if (nc2 % bs) ok = 0; 7802 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7803 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7804 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7805 if (nc1 != nc2) ok = 0; 7806 else { 7807 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7808 if (cols1[jj] != cols2[jj]) ok = 0; 7809 if (cols1[jj] % bs != jj % bs) ok = 0; 7810 } 7811 } 7812 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7813 } 7814 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7815 if (!ok) { 7816 PetscCall(PetscFree2(d_nnz, o_nnz)); 7817 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7818 goto old_bs; 7819 } 7820 } 7821 } 7822 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7823 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7824 PetscCall(PetscFree2(d_nnz, o_nnz)); 7825 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7826 // diag 7827 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7828 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7829 ai = aseq->i; 7830 n = ai[brow + 1] - ai[brow]; 7831 aj = aseq->j + ai[brow]; 7832 for (int k = 0; k < n; k += bs) { // block columns 7833 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7834 val = 0; 7835 if (index_size == 0) { 7836 for (int ii = 0; ii < bs; ii++) { // rows in block 7837 aa = aseq->a + ai[brow + ii] + k; 7838 for (int jj = 0; jj < bs; jj++) { // columns in block 7839 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7840 } 7841 } 7842 } else { // use (index,index) value if provided 7843 for (int iii = 0; iii < index_size; iii++) { // rows in block 7844 int ii = index[iii]; 7845 aa = aseq->a + ai[brow + ii] + k; 7846 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7847 int jj = index[jjj]; 7848 val += PetscAbs(PetscRealPart(aa[jj])); 7849 } 7850 } 7851 } 7852 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7853 AA[k / bs] = val; 7854 } 7855 grow = Istart / bs + brow / bs; 7856 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7857 } 7858 // off-diag 7859 if (ismpiaij) { 7860 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7861 const PetscScalar *vals; 7862 const PetscInt *cols, *garray = aij->garray; 7863 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7864 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7865 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7866 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7867 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7868 AA[k / bs] = 0; 7869 AJ[cidx] = garray[cols[k]] / bs; 7870 } 7871 nc = ncols / bs; 7872 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7873 if (index_size == 0) { 7874 for (int ii = 0; ii < bs; ii++) { // rows in block 7875 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7876 for (int k = 0; k < ncols; k += bs) { 7877 for (int jj = 0; jj < bs; jj++) { // cols in block 7878 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7879 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7880 } 7881 } 7882 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7883 } 7884 } else { // use (index,index) value if provided 7885 for (int iii = 0; iii < index_size; iii++) { // rows in block 7886 int ii = index[iii]; 7887 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7888 for (int k = 0; k < ncols; k += bs) { 7889 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7890 int jj = index[jjj]; 7891 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7892 } 7893 } 7894 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7895 } 7896 } 7897 grow = Istart / bs + brow / bs; 7898 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7899 } 7900 } 7901 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7902 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7903 PetscCall(PetscFree2(AA, AJ)); 7904 } else { 7905 const PetscScalar *vals; 7906 const PetscInt *idx; 7907 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7908 old_bs: 7909 /* 7910 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7911 */ 7912 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7913 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7914 if (isseqaij) { 7915 PetscInt max_d_nnz; 7916 /* 7917 Determine exact preallocation count for (sequential) scalar matrix 7918 */ 7919 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7920 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7921 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7922 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7923 PetscCall(PetscFree3(w0, w1, w2)); 7924 } else if (ismpiaij) { 7925 Mat Daij, Oaij; 7926 const PetscInt *garray; 7927 PetscInt max_d_nnz; 7928 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7929 /* 7930 Determine exact preallocation count for diagonal block portion of scalar matrix 7931 */ 7932 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7933 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7934 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7935 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7936 PetscCall(PetscFree3(w0, w1, w2)); 7937 /* 7938 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7939 */ 7940 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7941 o_nnz[jj] = 0; 7942 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7943 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7944 o_nnz[jj] += ncols; 7945 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7946 } 7947 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7948 } 7949 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7950 /* get scalar copy (norms) of matrix */ 7951 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7952 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7953 PetscCall(PetscFree2(d_nnz, o_nnz)); 7954 for (Ii = Istart; Ii < Iend; Ii++) { 7955 PetscInt dest_row = Ii / bs; 7956 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7957 for (jj = 0; jj < ncols; jj++) { 7958 PetscInt dest_col = idx[jj] / bs; 7959 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7960 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7961 } 7962 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7963 } 7964 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7965 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7966 } 7967 } else { 7968 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7969 else { 7970 Gmat = Amat; 7971 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7972 } 7973 if (isseqaij) { 7974 a = Gmat; 7975 b = NULL; 7976 } else { 7977 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7978 a = d->A; 7979 b = d->B; 7980 } 7981 if (filter >= 0 || scale) { 7982 /* take absolute value of each entry */ 7983 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7984 MatInfo info; 7985 PetscScalar *avals; 7986 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7987 PetscCall(MatSeqAIJGetArray(c, &avals)); 7988 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7989 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7990 } 7991 } 7992 } 7993 if (symmetrize) { 7994 PetscBool isset, issym; 7995 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7996 if (!isset || !issym) { 7997 Mat matTrans; 7998 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7999 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8000 PetscCall(MatDestroy(&matTrans)); 8001 } 8002 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8003 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8004 if (scale) { 8005 /* scale c for all diagonal values = 1 or -1 */ 8006 Vec diag; 8007 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8008 PetscCall(MatGetDiagonal(Gmat, diag)); 8009 PetscCall(VecReciprocal(diag)); 8010 PetscCall(VecSqrtAbs(diag)); 8011 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8012 PetscCall(VecDestroy(&diag)); 8013 } 8014 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8015 8016 if (filter >= 0) { 8017 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8018 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8019 } 8020 *a_Gmat = Gmat; 8021 PetscFunctionReturn(PETSC_SUCCESS); 8022 } 8023 8024 /* 8025 Special version for direct calls from Fortran 8026 */ 8027 #include <petsc/private/fortranimpl.h> 8028 8029 /* Change these macros so can be used in void function */ 8030 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8031 #undef PetscCall 8032 #define PetscCall(...) \ 8033 do { \ 8034 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8035 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8036 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8037 return; \ 8038 } \ 8039 } while (0) 8040 8041 #undef SETERRQ 8042 #define SETERRQ(comm, ierr, ...) \ 8043 do { \ 8044 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8045 return; \ 8046 } while (0) 8047 8048 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8049 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8050 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8051 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8052 #else 8053 #endif 8054 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8055 { 8056 Mat mat = *mmat; 8057 PetscInt m = *mm, n = *mn; 8058 InsertMode addv = *maddv; 8059 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8060 PetscScalar value; 8061 8062 MatCheckPreallocated(mat, 1); 8063 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8064 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8065 { 8066 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8067 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8068 PetscBool roworiented = aij->roworiented; 8069 8070 /* Some Variables required in the macro */ 8071 Mat A = aij->A; 8072 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8073 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8074 MatScalar *aa; 8075 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8076 Mat B = aij->B; 8077 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8078 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8079 MatScalar *ba; 8080 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8081 * cannot use "#if defined" inside a macro. */ 8082 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8083 8084 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8085 PetscInt nonew = a->nonew; 8086 MatScalar *ap1, *ap2; 8087 8088 PetscFunctionBegin; 8089 PetscCall(MatSeqAIJGetArray(A, &aa)); 8090 PetscCall(MatSeqAIJGetArray(B, &ba)); 8091 for (i = 0; i < m; i++) { 8092 if (im[i] < 0) continue; 8093 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8094 if (im[i] >= rstart && im[i] < rend) { 8095 row = im[i] - rstart; 8096 lastcol1 = -1; 8097 rp1 = aj + ai[row]; 8098 ap1 = aa + ai[row]; 8099 rmax1 = aimax[row]; 8100 nrow1 = ailen[row]; 8101 low1 = 0; 8102 high1 = nrow1; 8103 lastcol2 = -1; 8104 rp2 = bj + bi[row]; 8105 ap2 = ba + bi[row]; 8106 rmax2 = bimax[row]; 8107 nrow2 = bilen[row]; 8108 low2 = 0; 8109 high2 = nrow2; 8110 8111 for (j = 0; j < n; j++) { 8112 if (roworiented) value = v[i * n + j]; 8113 else value = v[i + j * m]; 8114 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8115 if (in[j] >= cstart && in[j] < cend) { 8116 col = in[j] - cstart; 8117 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8118 } else if (in[j] < 0) continue; 8119 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8120 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8121 } else { 8122 if (mat->was_assembled) { 8123 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8124 #if defined(PETSC_USE_CTABLE) 8125 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8126 col--; 8127 #else 8128 col = aij->colmap[in[j]] - 1; 8129 #endif 8130 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8131 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8132 col = in[j]; 8133 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8134 B = aij->B; 8135 b = (Mat_SeqAIJ *)B->data; 8136 bimax = b->imax; 8137 bi = b->i; 8138 bilen = b->ilen; 8139 bj = b->j; 8140 rp2 = bj + bi[row]; 8141 ap2 = ba + bi[row]; 8142 rmax2 = bimax[row]; 8143 nrow2 = bilen[row]; 8144 low2 = 0; 8145 high2 = nrow2; 8146 bm = aij->B->rmap->n; 8147 ba = b->a; 8148 inserted = PETSC_FALSE; 8149 } 8150 } else col = in[j]; 8151 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8152 } 8153 } 8154 } else if (!aij->donotstash) { 8155 if (roworiented) { 8156 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8157 } else { 8158 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8159 } 8160 } 8161 } 8162 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8163 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8164 } 8165 PetscFunctionReturnVoid(); 8166 } 8167 8168 /* Undefining these here since they were redefined from their original definition above! No 8169 * other PETSc functions should be defined past this point, as it is impossible to recover the 8170 * original definitions */ 8171 #undef PetscCall 8172 #undef SETERRQ 8173