1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 PetscMPIInt in; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 PetscCall(PetscMPIIntCast(n, &in)); 313 if (type == NORM_INFINITY) { 314 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 315 } else { 316 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 317 } 318 PetscCall(PetscFree(work)); 319 if (type == NORM_2) { 320 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 321 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 322 for (i = 0; i < n; i++) reductions[i] /= m; 323 } 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 328 { 329 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 330 IS sis, gis; 331 const PetscInt *isis, *igis; 332 PetscInt n, *iis, nsis, ngis, rstart, i; 333 334 PetscFunctionBegin; 335 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 336 PetscCall(MatFindNonzeroRows(a->B, &gis)); 337 PetscCall(ISGetSize(gis, &ngis)); 338 PetscCall(ISGetSize(sis, &nsis)); 339 PetscCall(ISGetIndices(sis, &isis)); 340 PetscCall(ISGetIndices(gis, &igis)); 341 342 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 343 PetscCall(PetscArraycpy(iis, igis, ngis)); 344 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 345 n = ngis + nsis; 346 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 348 for (i = 0; i < n; i++) iis[i] += rstart; 349 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 350 351 PetscCall(ISRestoreIndices(sis, &isis)); 352 PetscCall(ISRestoreIndices(gis, &igis)); 353 PetscCall(ISDestroy(&sis)); 354 PetscCall(ISDestroy(&gis)); 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 /* 359 Local utility routine that creates a mapping from the global column 360 number to the local number in the off-diagonal part of the local 361 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 362 a slightly higher hash table cost; without it it is not scalable (each processor 363 has an order N integer array but is fast to access. 364 */ 365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 366 { 367 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 368 PetscInt n = aij->B->cmap->n, i; 369 370 PetscFunctionBegin; 371 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 372 #if defined(PETSC_USE_CTABLE) 373 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 374 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 375 #else 376 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 377 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 378 #endif 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 383 do { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col; \ 387 while (high1 - low1 > 5) { \ 388 t = (low1 + high1) / 2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i = low1; _i < high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) { \ 396 ap1[_i] += value; \ 397 /* Not sure LogFlops will slow dow the code or not */ \ 398 (void)PetscLogFlops(1.0); \ 399 } else ap1[_i] = value; \ 400 goto a_noinsert; \ 401 } \ 402 } \ 403 if (value == 0.0 && ignorezeroentries && row != col) { \ 404 low1 = 0; \ 405 high1 = nrow1; \ 406 goto a_noinsert; \ 407 } \ 408 if (nonew == 1) { \ 409 low1 = 0; \ 410 high1 = nrow1; \ 411 goto a_noinsert; \ 412 } \ 413 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 414 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 415 N = nrow1++ - 1; \ 416 a->nz++; \ 417 high1++; \ 418 /* shift up all the later entries in this row */ \ 419 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 420 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 421 rp1[_i] = col; \ 422 ap1[_i] = value; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 584 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right-hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1128 1129 PetscFunctionBegin; 1130 /* do nondiagonal part */ 1131 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1132 /* do local part */ 1133 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1134 /* add partial results together */ 1135 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1136 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /* 1141 This only works correctly for square matrices where the subblock A->A is the 1142 diagonal block 1143 */ 1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1150 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1151 PetscCall(MatGetDiagonal(a->A, v)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatScale(a->A, aa)); 1161 PetscCall(MatScale(a->B, aa)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1170 const PetscInt *garray = aij->garray; 1171 const PetscScalar *aa, *ba; 1172 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1173 PetscInt64 nz, hnz; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 PetscMPIInt rank; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1195 if (rank == 0) { 1196 if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX; 1197 else header[3] = (PetscInt)hnz; 1198 } 1199 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1200 1201 /* fill in and store row lengths */ 1202 PetscCall(PetscMalloc1(m, &rowlens)); 1203 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1205 PetscCall(PetscFree(rowlens)); 1206 1207 /* fill in and store column indices */ 1208 PetscCall(PetscMalloc1(nz, &colidxs)); 1209 for (cnt = 0, i = 0; i < m; i++) { 1210 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1211 if (garray[B->j[jb]] > cs) break; 1212 colidxs[cnt++] = garray[B->j[jb]]; 1213 } 1214 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1215 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1216 } 1217 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1218 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1219 PetscCall(PetscFree(colidxs)); 1220 1221 /* fill in and store nonzero values */ 1222 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1223 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1224 PetscCall(PetscMalloc1(nz, &matvals)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 matvals[cnt++] = ba[jb]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1231 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1232 } 1233 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1235 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1236 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1237 PetscCall(PetscFree(matvals)); 1238 1239 /* write block size option to the viewer's .info file */ 1240 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 #include <petscdraw.h> 1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1246 { 1247 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1248 PetscMPIInt rank = aij->rank, size = aij->size; 1249 PetscBool isdraw, iascii, isbinary; 1250 PetscViewer sviewer; 1251 PetscViewerFormat format; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1255 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1256 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1257 if (iascii) { 1258 PetscCall(PetscViewerGetFormat(viewer, &format)); 1259 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1260 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1261 PetscCall(PetscMalloc1(size, &nz)); 1262 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1263 for (i = 0; i < (PetscInt)size; i++) { 1264 nmax = PetscMax(nmax, nz[i]); 1265 nmin = PetscMin(nmin, nz[i]); 1266 navg += nz[i]; 1267 } 1268 PetscCall(PetscFree(nz)); 1269 navg = navg / size; 1270 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 PetscCall(PetscViewerGetFormat(viewer, &format)); 1274 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1275 MatInfo info; 1276 PetscInt *inodes = NULL; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1279 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1280 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1281 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1282 if (!inodes) { 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1284 (double)info.memory)); 1285 } else { 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1287 (double)info.memory)); 1288 } 1289 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1293 PetscCall(PetscViewerFlush(viewer)); 1294 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1295 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1296 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1299 PetscInt inodecount, inodelimit, *inodes; 1300 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1301 if (inodes) { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1303 } else { 1304 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1305 } 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 } else if (isbinary) { 1311 if (size == 1) { 1312 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1313 PetscCall(MatView(aij->A, viewer)); 1314 } else { 1315 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (iascii && size == 1) { 1319 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1320 PetscCall(MatView(aij->A, viewer)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } else if (isdraw) { 1323 PetscDraw draw; 1324 PetscBool isnull; 1325 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1326 PetscCall(PetscDrawIsNull(draw, &isnull)); 1327 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 { /* assemble the entire matrix onto first processor */ 1331 Mat A = NULL, Av; 1332 IS isrow, iscol; 1333 1334 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1335 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1336 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1337 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1338 /* The commented code uses MatCreateSubMatrices instead */ 1339 /* 1340 Mat *AA, A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1344 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1345 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1346 if (rank == 0) { 1347 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1348 A = AA[0]; 1349 Av = AA[0]; 1350 } 1351 PetscCall(MatDestroySubMatrices(1,&AA)); 1352 */ 1353 PetscCall(ISDestroy(&iscol)); 1354 PetscCall(ISDestroy(&isrow)); 1355 /* 1356 Everyone has to call to draw the matrix since the graphics waits are 1357 synchronized across all processors that share the PetscDraw object 1358 */ 1359 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 if (rank == 0) { 1361 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1362 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1363 } 1364 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1365 PetscCall(MatDestroy(&A)); 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1371 { 1372 PetscBool iascii, isdraw, issocket, isbinary; 1373 1374 PetscFunctionBegin; 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1377 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1378 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1379 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1384 { 1385 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1386 Vec bb1 = NULL; 1387 PetscBool hasop; 1388 1389 PetscFunctionBegin; 1390 if (flag == SOR_APPLY_UPPER) { 1391 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1396 1397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1398 if (flag & SOR_ZERO_INITIAL_GUESS) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 its--; 1401 } 1402 1403 while (its--) { 1404 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1405 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1406 1407 /* update rhs: bb1 = bb - B*x */ 1408 PetscCall(VecScale(mat->lvec, -1.0)); 1409 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1410 1411 /* local sweep */ 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1413 } 1414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_EISENSTAT) { 1447 Vec xx1; 1448 1449 PetscCall(VecDuplicate(bb, &xx1)); 1450 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1451 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 if (!mat->diag) { 1455 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1456 PetscCall(MatGetDiagonal(matin, mat->diag)); 1457 } 1458 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1459 if (hasop) { 1460 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1461 } else { 1462 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1463 } 1464 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1465 1466 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1467 1468 /* local sweep */ 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1470 PetscCall(VecAXPY(xx, 1.0, xx1)); 1471 PetscCall(VecDestroy(&xx1)); 1472 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1473 1474 PetscCall(VecDestroy(&bb1)); 1475 1476 matin->factorerrortype = mat->A->factorerrortype; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1481 { 1482 Mat aA, aB, Aperm; 1483 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1484 PetscScalar *aa, *ba; 1485 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1486 PetscSF rowsf, sf; 1487 IS parcolp = NULL; 1488 PetscBool done; 1489 1490 PetscFunctionBegin; 1491 PetscCall(MatGetLocalSize(A, &m, &n)); 1492 PetscCall(ISGetIndices(rowp, &rwant)); 1493 PetscCall(ISGetIndices(colp, &cwant)); 1494 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1495 1496 /* Invert row permutation to find out where my rows should go */ 1497 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1498 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1499 PetscCall(PetscSFSetFromOptions(rowsf)); 1500 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1501 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1502 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1503 1504 /* Invert column permutation to find out where my columns should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1506 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1507 PetscCall(PetscSFSetFromOptions(sf)); 1508 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1511 PetscCall(PetscSFDestroy(&sf)); 1512 1513 PetscCall(ISRestoreIndices(rowp, &rwant)); 1514 PetscCall(ISRestoreIndices(colp, &cwant)); 1515 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1516 1517 /* Find out where my gcols should go */ 1518 PetscCall(MatGetSize(aB, NULL, &ng)); 1519 PetscCall(PetscMalloc1(ng, &gcdest)); 1520 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1521 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1522 PetscCall(PetscSFSetFromOptions(sf)); 1523 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1524 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1525 PetscCall(PetscSFDestroy(&sf)); 1526 1527 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1528 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1529 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1530 for (i = 0; i < m; i++) { 1531 PetscInt row = rdest[i]; 1532 PetscMPIInt rowner; 1533 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 PetscInt col = cdest[aj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 for (j = bi[i]; j < bi[i + 1]; j++) { 1542 PetscInt col = gcdest[bj[j]]; 1543 PetscMPIInt cowner; 1544 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1545 if (rowner == cowner) dnnz[i]++; 1546 else onnz[i]++; 1547 } 1548 } 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1551 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&rowsf)); 1554 1555 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1556 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1557 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1560 PetscInt j0, rowlen; 1561 rowlen = ai[i + 1] - ai[i]; 1562 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1563 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1564 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1565 } 1566 rowlen = bi[i + 1] - bi[i]; 1567 for (j0 = j = 0; j < rowlen; j0 = j) { 1568 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1569 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1570 } 1571 } 1572 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1575 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1576 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1577 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1578 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1579 PetscCall(PetscFree3(work, rdest, cdest)); 1580 PetscCall(PetscFree(gcdest)); 1581 if (parcolp) PetscCall(ISDestroy(&colp)); 1582 *B = Aperm; 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 } 1585 1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1587 { 1588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1589 1590 PetscFunctionBegin; 1591 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1592 if (ghosts) *ghosts = aij->garray; 1593 PetscFunctionReturn(PETSC_SUCCESS); 1594 } 1595 1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1597 { 1598 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1599 Mat A = mat->A, B = mat->B; 1600 PetscLogDouble isend[5], irecv[5]; 1601 1602 PetscFunctionBegin; 1603 info->block_size = 1.0; 1604 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1605 1606 isend[0] = info->nz_used; 1607 isend[1] = info->nz_allocated; 1608 isend[2] = info->nz_unneeded; 1609 isend[3] = info->memory; 1610 isend[4] = info->mallocs; 1611 1612 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1613 1614 isend[0] += info->nz_used; 1615 isend[1] += info->nz_allocated; 1616 isend[2] += info->nz_unneeded; 1617 isend[3] += info->memory; 1618 isend[4] += info->mallocs; 1619 if (flag == MAT_LOCAL) { 1620 info->nz_used = isend[0]; 1621 info->nz_allocated = isend[1]; 1622 info->nz_unneeded = isend[2]; 1623 info->memory = isend[3]; 1624 info->mallocs = isend[4]; 1625 } else if (flag == MAT_GLOBAL_MAX) { 1626 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1627 1628 info->nz_used = irecv[0]; 1629 info->nz_allocated = irecv[1]; 1630 info->nz_unneeded = irecv[2]; 1631 info->memory = irecv[3]; 1632 info->mallocs = irecv[4]; 1633 } else if (flag == MAT_GLOBAL_SUM) { 1634 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } 1642 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1643 info->fill_ratio_needed = 0; 1644 info->factor_mallocs = 0; 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1649 { 1650 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1651 1652 PetscFunctionBegin; 1653 switch (op) { 1654 case MAT_NEW_NONZERO_LOCATIONS: 1655 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1656 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1657 case MAT_KEEP_NONZERO_PATTERN: 1658 case MAT_NEW_NONZERO_LOCATION_ERR: 1659 case MAT_USE_INODES: 1660 case MAT_IGNORE_ZERO_ENTRIES: 1661 case MAT_FORM_EXPLICIT_TRANSPOSE: 1662 MatCheckPreallocated(A, 1); 1663 PetscCall(MatSetOption(a->A, op, flg)); 1664 PetscCall(MatSetOption(a->B, op, flg)); 1665 break; 1666 case MAT_ROW_ORIENTED: 1667 MatCheckPreallocated(A, 1); 1668 a->roworiented = flg; 1669 1670 PetscCall(MatSetOption(a->A, op, flg)); 1671 PetscCall(MatSetOption(a->B, op, flg)); 1672 break; 1673 case MAT_FORCE_DIAGONAL_ENTRIES: 1674 case MAT_SORTED_FULL: 1675 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1676 break; 1677 case MAT_IGNORE_OFF_PROC_ENTRIES: 1678 a->donotstash = flg; 1679 break; 1680 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1681 case MAT_SPD: 1682 case MAT_SYMMETRIC: 1683 case MAT_STRUCTURALLY_SYMMETRIC: 1684 case MAT_HERMITIAN: 1685 case MAT_SYMMETRY_ETERNAL: 1686 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1687 case MAT_SPD_ETERNAL: 1688 /* if the diagonal matrix is square it inherits some of the properties above */ 1689 break; 1690 case MAT_SUBMAT_SINGLEIS: 1691 A->submat_singleis = flg; 1692 break; 1693 case MAT_STRUCTURE_ONLY: 1694 /* The option is handled directly by MatSetOption() */ 1695 break; 1696 default: 1697 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1698 } 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1703 { 1704 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1705 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1706 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1707 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1708 PetscInt *cmap, *idx_p; 1709 1710 PetscFunctionBegin; 1711 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1712 mat->getrowactive = PETSC_TRUE; 1713 1714 if (!mat->rowvalues && (idx || v)) { 1715 /* 1716 allocate enough space to hold information from the longest row. 1717 */ 1718 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1719 PetscInt max = 1, tmp; 1720 for (i = 0; i < matin->rmap->n; i++) { 1721 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1722 if (max < tmp) max = tmp; 1723 } 1724 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1725 } 1726 1727 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1728 lrow = row - rstart; 1729 1730 pvA = &vworkA; 1731 pcA = &cworkA; 1732 pvB = &vworkB; 1733 pcB = &cworkB; 1734 if (!v) { 1735 pvA = NULL; 1736 pvB = NULL; 1737 } 1738 if (!idx) { 1739 pcA = NULL; 1740 if (!v) pcB = NULL; 1741 } 1742 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1743 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1744 nztot = nzA + nzB; 1745 1746 cmap = mat->garray; 1747 if (v || idx) { 1748 if (nztot) { 1749 /* Sort by increasing column numbers, assuming A and B already sorted */ 1750 PetscInt imark = -1; 1751 if (v) { 1752 *v = v_p = mat->rowvalues; 1753 for (i = 0; i < nzB; i++) { 1754 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1755 else break; 1756 } 1757 imark = i; 1758 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1759 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1760 } 1761 if (idx) { 1762 *idx = idx_p = mat->rowindices; 1763 if (imark > -1) { 1764 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1765 } else { 1766 for (i = 0; i < nzB; i++) { 1767 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1768 else break; 1769 } 1770 imark = i; 1771 } 1772 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1773 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1774 } 1775 } else { 1776 if (idx) *idx = NULL; 1777 if (v) *v = NULL; 1778 } 1779 } 1780 *nz = nztot; 1781 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1782 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1787 { 1788 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1789 1790 PetscFunctionBegin; 1791 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1792 aij->getrowactive = PETSC_FALSE; 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1800 PetscInt i, j, cstart = mat->cmap->rstart; 1801 PetscReal sum = 0.0; 1802 const MatScalar *v, *amata, *bmata; 1803 PetscMPIInt iN; 1804 1805 PetscFunctionBegin; 1806 if (aij->size == 1) { 1807 PetscCall(MatNorm(aij->A, type, norm)); 1808 } else { 1809 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1810 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1811 if (type == NORM_FROBENIUS) { 1812 v = amata; 1813 for (i = 0; i < amat->nz; i++) { 1814 sum += PetscRealPart(PetscConj(*v) * (*v)); 1815 v++; 1816 } 1817 v = bmata; 1818 for (i = 0; i < bmat->nz; i++) { 1819 sum += PetscRealPart(PetscConj(*v) * (*v)); 1820 v++; 1821 } 1822 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1823 *norm = PetscSqrtReal(*norm); 1824 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1825 } else if (type == NORM_1) { /* max column norm */ 1826 PetscReal *tmp, *tmp2; 1827 PetscInt *jj, *garray = aij->garray; 1828 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1829 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1830 *norm = 0.0; 1831 v = amata; 1832 jj = amat->j; 1833 for (j = 0; j < amat->nz; j++) { 1834 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1835 v++; 1836 } 1837 v = bmata; 1838 jj = bmat->j; 1839 for (j = 0; j < bmat->nz; j++) { 1840 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1841 v++; 1842 } 1843 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 1844 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1845 for (j = 0; j < mat->cmap->N; j++) { 1846 if (tmp2[j] > *norm) *norm = tmp2[j]; 1847 } 1848 PetscCall(PetscFree(tmp)); 1849 PetscCall(PetscFree(tmp2)); 1850 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1851 } else if (type == NORM_INFINITY) { /* max row norm */ 1852 PetscReal ntemp = 0.0; 1853 for (j = 0; j < aij->A->rmap->n; j++) { 1854 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1855 sum = 0.0; 1856 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1857 sum += PetscAbsScalar(*v); 1858 v++; 1859 } 1860 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1861 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1862 sum += PetscAbsScalar(*v); 1863 v++; 1864 } 1865 if (sum > ntemp) ntemp = sum; 1866 } 1867 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1868 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1869 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1870 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1871 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1872 } 1873 PetscFunctionReturn(PETSC_SUCCESS); 1874 } 1875 1876 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1877 { 1878 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1879 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1880 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1881 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1882 Mat B, A_diag, *B_diag; 1883 const MatScalar *pbv, *bv; 1884 1885 PetscFunctionBegin; 1886 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1887 ma = A->rmap->n; 1888 na = A->cmap->n; 1889 mb = a->B->rmap->n; 1890 nb = a->B->cmap->n; 1891 ai = Aloc->i; 1892 aj = Aloc->j; 1893 bi = Bloc->i; 1894 bj = Bloc->j; 1895 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1896 PetscInt *d_nnz, *g_nnz, *o_nnz; 1897 PetscSFNode *oloc; 1898 PETSC_UNUSED PetscSF sf; 1899 1900 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1901 /* compute d_nnz for preallocation */ 1902 PetscCall(PetscArrayzero(d_nnz, na)); 1903 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1904 /* compute local off-diagonal contributions */ 1905 PetscCall(PetscArrayzero(g_nnz, nb)); 1906 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1907 /* map those to global */ 1908 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1909 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1910 PetscCall(PetscSFSetFromOptions(sf)); 1911 PetscCall(PetscArrayzero(o_nnz, na)); 1912 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1913 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1914 PetscCall(PetscSFDestroy(&sf)); 1915 1916 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1917 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1918 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1919 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1920 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1921 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1922 } else { 1923 B = *matout; 1924 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1925 } 1926 1927 b = (Mat_MPIAIJ *)B->data; 1928 A_diag = a->A; 1929 B_diag = &b->A; 1930 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1931 A_diag_ncol = A_diag->cmap->N; 1932 B_diag_ilen = sub_B_diag->ilen; 1933 B_diag_i = sub_B_diag->i; 1934 1935 /* Set ilen for diagonal of B */ 1936 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1937 1938 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1939 very quickly (=without using MatSetValues), because all writes are local. */ 1940 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1941 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1942 1943 /* copy over the B part */ 1944 PetscCall(PetscMalloc1(bi[mb], &cols)); 1945 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1946 pbv = bv; 1947 row = A->rmap->rstart; 1948 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1949 cols_tmp = cols; 1950 for (i = 0; i < mb; i++) { 1951 ncol = bi[i + 1] - bi[i]; 1952 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1953 row++; 1954 if (pbv) pbv += ncol; 1955 if (cols_tmp) cols_tmp += ncol; 1956 } 1957 PetscCall(PetscFree(cols)); 1958 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1959 1960 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1961 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1962 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1963 *matout = B; 1964 } else { 1965 PetscCall(MatHeaderMerge(A, &B)); 1966 } 1967 PetscFunctionReturn(PETSC_SUCCESS); 1968 } 1969 1970 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1971 { 1972 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1973 Mat a = aij->A, b = aij->B; 1974 PetscInt s1, s2, s3; 1975 1976 PetscFunctionBegin; 1977 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1978 if (rr) { 1979 PetscCall(VecGetLocalSize(rr, &s1)); 1980 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1981 /* Overlap communication with computation. */ 1982 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1983 } 1984 if (ll) { 1985 PetscCall(VecGetLocalSize(ll, &s1)); 1986 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1987 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1988 } 1989 /* scale the diagonal block */ 1990 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1991 1992 if (rr) { 1993 /* Do a scatter end and then right scale the off-diagonal block */ 1994 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1995 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1996 } 1997 PetscFunctionReturn(PETSC_SUCCESS); 1998 } 1999 2000 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2001 { 2002 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2003 2004 PetscFunctionBegin; 2005 PetscCall(MatSetUnfactored(a->A)); 2006 PetscFunctionReturn(PETSC_SUCCESS); 2007 } 2008 2009 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2010 { 2011 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2012 Mat a, b, c, d; 2013 PetscBool flg; 2014 2015 PetscFunctionBegin; 2016 a = matA->A; 2017 b = matA->B; 2018 c = matB->A; 2019 d = matB->B; 2020 2021 PetscCall(MatEqual(a, c, &flg)); 2022 if (flg) PetscCall(MatEqual(b, d, &flg)); 2023 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2024 PetscFunctionReturn(PETSC_SUCCESS); 2025 } 2026 2027 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2028 { 2029 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2030 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2031 2032 PetscFunctionBegin; 2033 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2034 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2035 /* because of the column compression in the off-processor part of the matrix a->B, 2036 the number of columns in a->B and b->B may be different, hence we cannot call 2037 the MatCopy() directly on the two parts. If need be, we can provide a more 2038 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2039 then copying the submatrices */ 2040 PetscCall(MatCopy_Basic(A, B, str)); 2041 } else { 2042 PetscCall(MatCopy(a->A, b->A, str)); 2043 PetscCall(MatCopy(a->B, b->B, str)); 2044 } 2045 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2046 PetscFunctionReturn(PETSC_SUCCESS); 2047 } 2048 2049 /* 2050 Computes the number of nonzeros per row needed for preallocation when X and Y 2051 have different nonzero structure. 2052 */ 2053 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2054 { 2055 PetscInt i, j, k, nzx, nzy; 2056 2057 PetscFunctionBegin; 2058 /* Set the number of nonzeros in the new matrix */ 2059 for (i = 0; i < m; i++) { 2060 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2061 nzx = xi[i + 1] - xi[i]; 2062 nzy = yi[i + 1] - yi[i]; 2063 nnz[i] = 0; 2064 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2065 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2066 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2067 nnz[i]++; 2068 } 2069 for (; k < nzy; k++) nnz[i]++; 2070 } 2071 PetscFunctionReturn(PETSC_SUCCESS); 2072 } 2073 2074 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2075 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2076 { 2077 PetscInt m = Y->rmap->N; 2078 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2079 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2080 2081 PetscFunctionBegin; 2082 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2083 PetscFunctionReturn(PETSC_SUCCESS); 2084 } 2085 2086 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2087 { 2088 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2089 2090 PetscFunctionBegin; 2091 if (str == SAME_NONZERO_PATTERN) { 2092 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2093 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2094 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2095 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2096 } else { 2097 Mat B; 2098 PetscInt *nnz_d, *nnz_o; 2099 2100 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2101 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2102 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2103 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2104 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2105 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2106 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2107 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2108 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2109 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2110 PetscCall(MatHeaderMerge(Y, &B)); 2111 PetscCall(PetscFree(nnz_d)); 2112 PetscCall(PetscFree(nnz_o)); 2113 } 2114 PetscFunctionReturn(PETSC_SUCCESS); 2115 } 2116 2117 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2118 2119 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2120 { 2121 PetscFunctionBegin; 2122 if (PetscDefined(USE_COMPLEX)) { 2123 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2124 2125 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2126 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2127 } 2128 PetscFunctionReturn(PETSC_SUCCESS); 2129 } 2130 2131 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2132 { 2133 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2134 2135 PetscFunctionBegin; 2136 PetscCall(MatRealPart(a->A)); 2137 PetscCall(MatRealPart(a->B)); 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2144 2145 PetscFunctionBegin; 2146 PetscCall(MatImaginaryPart(a->A)); 2147 PetscCall(MatImaginaryPart(a->B)); 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2152 { 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2154 PetscInt i, *idxb = NULL, m = A->rmap->n; 2155 PetscScalar *va, *vv; 2156 Vec vB, vA; 2157 const PetscScalar *vb; 2158 2159 PetscFunctionBegin; 2160 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2161 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2162 2163 PetscCall(VecGetArrayWrite(vA, &va)); 2164 if (idx) { 2165 for (i = 0; i < m; i++) { 2166 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2167 } 2168 } 2169 2170 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2171 PetscCall(PetscMalloc1(m, &idxb)); 2172 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2173 2174 PetscCall(VecGetArrayWrite(v, &vv)); 2175 PetscCall(VecGetArrayRead(vB, &vb)); 2176 for (i = 0; i < m; i++) { 2177 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2178 vv[i] = vb[i]; 2179 if (idx) idx[i] = a->garray[idxb[i]]; 2180 } else { 2181 vv[i] = va[i]; 2182 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2183 } 2184 } 2185 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2186 PetscCall(VecRestoreArrayWrite(vA, &va)); 2187 PetscCall(VecRestoreArrayRead(vB, &vb)); 2188 PetscCall(PetscFree(idxb)); 2189 PetscCall(VecDestroy(&vA)); 2190 PetscCall(VecDestroy(&vB)); 2191 PetscFunctionReturn(PETSC_SUCCESS); 2192 } 2193 2194 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2195 { 2196 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2197 Vec vB, vA; 2198 2199 PetscFunctionBegin; 2200 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2201 PetscCall(MatGetRowSumAbs(a->A, vA)); 2202 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2203 PetscCall(MatGetRowSumAbs(a->B, vB)); 2204 PetscCall(VecAXPY(vA, 1.0, vB)); 2205 PetscCall(VecDestroy(&vB)); 2206 PetscCall(VecCopy(vA, v)); 2207 PetscCall(VecDestroy(&vA)); 2208 PetscFunctionReturn(PETSC_SUCCESS); 2209 } 2210 2211 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2212 { 2213 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2214 PetscInt m = A->rmap->n, n = A->cmap->n; 2215 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2216 PetscInt *cmap = mat->garray; 2217 PetscInt *diagIdx, *offdiagIdx; 2218 Vec diagV, offdiagV; 2219 PetscScalar *a, *diagA, *offdiagA; 2220 const PetscScalar *ba, *bav; 2221 PetscInt r, j, col, ncols, *bi, *bj; 2222 Mat B = mat->B; 2223 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2224 2225 PetscFunctionBegin; 2226 /* When a process holds entire A and other processes have no entry */ 2227 if (A->cmap->N == n) { 2228 PetscCall(VecGetArrayWrite(v, &diagA)); 2229 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2230 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2231 PetscCall(VecDestroy(&diagV)); 2232 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2233 PetscFunctionReturn(PETSC_SUCCESS); 2234 } else if (n == 0) { 2235 if (m) { 2236 PetscCall(VecGetArrayWrite(v, &a)); 2237 for (r = 0; r < m; r++) { 2238 a[r] = 0.0; 2239 if (idx) idx[r] = -1; 2240 } 2241 PetscCall(VecRestoreArrayWrite(v, &a)); 2242 } 2243 PetscFunctionReturn(PETSC_SUCCESS); 2244 } 2245 2246 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2250 2251 /* Get offdiagIdx[] for implicit 0.0 */ 2252 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2253 ba = bav; 2254 bi = b->i; 2255 bj = b->j; 2256 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2257 for (r = 0; r < m; r++) { 2258 ncols = bi[r + 1] - bi[r]; 2259 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2260 offdiagA[r] = *ba; 2261 offdiagIdx[r] = cmap[0]; 2262 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2263 offdiagA[r] = 0.0; 2264 2265 /* Find first hole in the cmap */ 2266 for (j = 0; j < ncols; j++) { 2267 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2268 if (col > j && j < cstart) { 2269 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2270 break; 2271 } else if (col > j + n && j >= cstart) { 2272 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2273 break; 2274 } 2275 } 2276 if (j == ncols && ncols < A->cmap->N - n) { 2277 /* a hole is outside compressed Bcols */ 2278 if (ncols == 0) { 2279 if (cstart) { 2280 offdiagIdx[r] = 0; 2281 } else offdiagIdx[r] = cend; 2282 } else { /* ncols > 0 */ 2283 offdiagIdx[r] = cmap[ncols - 1] + 1; 2284 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2285 } 2286 } 2287 } 2288 2289 for (j = 0; j < ncols; j++) { 2290 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2291 offdiagA[r] = *ba; 2292 offdiagIdx[r] = cmap[*bj]; 2293 } 2294 ba++; 2295 bj++; 2296 } 2297 } 2298 2299 PetscCall(VecGetArrayWrite(v, &a)); 2300 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2301 for (r = 0; r < m; ++r) { 2302 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2303 a[r] = diagA[r]; 2304 if (idx) idx[r] = cstart + diagIdx[r]; 2305 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2306 a[r] = diagA[r]; 2307 if (idx) { 2308 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2309 idx[r] = cstart + diagIdx[r]; 2310 } else idx[r] = offdiagIdx[r]; 2311 } 2312 } else { 2313 a[r] = offdiagA[r]; 2314 if (idx) idx[r] = offdiagIdx[r]; 2315 } 2316 } 2317 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2318 PetscCall(VecRestoreArrayWrite(v, &a)); 2319 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2320 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2321 PetscCall(VecDestroy(&diagV)); 2322 PetscCall(VecDestroy(&offdiagV)); 2323 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2324 PetscFunctionReturn(PETSC_SUCCESS); 2325 } 2326 2327 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2328 { 2329 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2330 PetscInt m = A->rmap->n, n = A->cmap->n; 2331 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2332 PetscInt *cmap = mat->garray; 2333 PetscInt *diagIdx, *offdiagIdx; 2334 Vec diagV, offdiagV; 2335 PetscScalar *a, *diagA, *offdiagA; 2336 const PetscScalar *ba, *bav; 2337 PetscInt r, j, col, ncols, *bi, *bj; 2338 Mat B = mat->B; 2339 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2340 2341 PetscFunctionBegin; 2342 /* When a process holds entire A and other processes have no entry */ 2343 if (A->cmap->N == n) { 2344 PetscCall(VecGetArrayWrite(v, &diagA)); 2345 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2346 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2347 PetscCall(VecDestroy(&diagV)); 2348 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2349 PetscFunctionReturn(PETSC_SUCCESS); 2350 } else if (n == 0) { 2351 if (m) { 2352 PetscCall(VecGetArrayWrite(v, &a)); 2353 for (r = 0; r < m; r++) { 2354 a[r] = PETSC_MAX_REAL; 2355 if (idx) idx[r] = -1; 2356 } 2357 PetscCall(VecRestoreArrayWrite(v, &a)); 2358 } 2359 PetscFunctionReturn(PETSC_SUCCESS); 2360 } 2361 2362 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2363 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2364 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2365 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2366 2367 /* Get offdiagIdx[] for implicit 0.0 */ 2368 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2369 ba = bav; 2370 bi = b->i; 2371 bj = b->j; 2372 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2373 for (r = 0; r < m; r++) { 2374 ncols = bi[r + 1] - bi[r]; 2375 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2376 offdiagA[r] = *ba; 2377 offdiagIdx[r] = cmap[0]; 2378 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2379 offdiagA[r] = 0.0; 2380 2381 /* Find first hole in the cmap */ 2382 for (j = 0; j < ncols; j++) { 2383 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2384 if (col > j && j < cstart) { 2385 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2386 break; 2387 } else if (col > j + n && j >= cstart) { 2388 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2389 break; 2390 } 2391 } 2392 if (j == ncols && ncols < A->cmap->N - n) { 2393 /* a hole is outside compressed Bcols */ 2394 if (ncols == 0) { 2395 if (cstart) { 2396 offdiagIdx[r] = 0; 2397 } else offdiagIdx[r] = cend; 2398 } else { /* ncols > 0 */ 2399 offdiagIdx[r] = cmap[ncols - 1] + 1; 2400 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2401 } 2402 } 2403 } 2404 2405 for (j = 0; j < ncols; j++) { 2406 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2407 offdiagA[r] = *ba; 2408 offdiagIdx[r] = cmap[*bj]; 2409 } 2410 ba++; 2411 bj++; 2412 } 2413 } 2414 2415 PetscCall(VecGetArrayWrite(v, &a)); 2416 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2417 for (r = 0; r < m; ++r) { 2418 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2419 a[r] = diagA[r]; 2420 if (idx) idx[r] = cstart + diagIdx[r]; 2421 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2422 a[r] = diagA[r]; 2423 if (idx) { 2424 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2425 idx[r] = cstart + diagIdx[r]; 2426 } else idx[r] = offdiagIdx[r]; 2427 } 2428 } else { 2429 a[r] = offdiagA[r]; 2430 if (idx) idx[r] = offdiagIdx[r]; 2431 } 2432 } 2433 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2434 PetscCall(VecRestoreArrayWrite(v, &a)); 2435 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2436 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2437 PetscCall(VecDestroy(&diagV)); 2438 PetscCall(VecDestroy(&offdiagV)); 2439 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2440 PetscFunctionReturn(PETSC_SUCCESS); 2441 } 2442 2443 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2444 { 2445 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2446 PetscInt m = A->rmap->n, n = A->cmap->n; 2447 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2448 PetscInt *cmap = mat->garray; 2449 PetscInt *diagIdx, *offdiagIdx; 2450 Vec diagV, offdiagV; 2451 PetscScalar *a, *diagA, *offdiagA; 2452 const PetscScalar *ba, *bav; 2453 PetscInt r, j, col, ncols, *bi, *bj; 2454 Mat B = mat->B; 2455 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2456 2457 PetscFunctionBegin; 2458 /* When a process holds entire A and other processes have no entry */ 2459 if (A->cmap->N == n) { 2460 PetscCall(VecGetArrayWrite(v, &diagA)); 2461 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2462 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2463 PetscCall(VecDestroy(&diagV)); 2464 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2465 PetscFunctionReturn(PETSC_SUCCESS); 2466 } else if (n == 0) { 2467 if (m) { 2468 PetscCall(VecGetArrayWrite(v, &a)); 2469 for (r = 0; r < m; r++) { 2470 a[r] = PETSC_MIN_REAL; 2471 if (idx) idx[r] = -1; 2472 } 2473 PetscCall(VecRestoreArrayWrite(v, &a)); 2474 } 2475 PetscFunctionReturn(PETSC_SUCCESS); 2476 } 2477 2478 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2479 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2480 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2481 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2482 2483 /* Get offdiagIdx[] for implicit 0.0 */ 2484 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2485 ba = bav; 2486 bi = b->i; 2487 bj = b->j; 2488 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2489 for (r = 0; r < m; r++) { 2490 ncols = bi[r + 1] - bi[r]; 2491 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2492 offdiagA[r] = *ba; 2493 offdiagIdx[r] = cmap[0]; 2494 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2495 offdiagA[r] = 0.0; 2496 2497 /* Find first hole in the cmap */ 2498 for (j = 0; j < ncols; j++) { 2499 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2500 if (col > j && j < cstart) { 2501 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2502 break; 2503 } else if (col > j + n && j >= cstart) { 2504 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2505 break; 2506 } 2507 } 2508 if (j == ncols && ncols < A->cmap->N - n) { 2509 /* a hole is outside compressed Bcols */ 2510 if (ncols == 0) { 2511 if (cstart) { 2512 offdiagIdx[r] = 0; 2513 } else offdiagIdx[r] = cend; 2514 } else { /* ncols > 0 */ 2515 offdiagIdx[r] = cmap[ncols - 1] + 1; 2516 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2517 } 2518 } 2519 } 2520 2521 for (j = 0; j < ncols; j++) { 2522 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2523 offdiagA[r] = *ba; 2524 offdiagIdx[r] = cmap[*bj]; 2525 } 2526 ba++; 2527 bj++; 2528 } 2529 } 2530 2531 PetscCall(VecGetArrayWrite(v, &a)); 2532 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2533 for (r = 0; r < m; ++r) { 2534 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2535 a[r] = diagA[r]; 2536 if (idx) idx[r] = cstart + diagIdx[r]; 2537 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2538 a[r] = diagA[r]; 2539 if (idx) { 2540 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2541 idx[r] = cstart + diagIdx[r]; 2542 } else idx[r] = offdiagIdx[r]; 2543 } 2544 } else { 2545 a[r] = offdiagA[r]; 2546 if (idx) idx[r] = offdiagIdx[r]; 2547 } 2548 } 2549 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2550 PetscCall(VecRestoreArrayWrite(v, &a)); 2551 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2552 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2553 PetscCall(VecDestroy(&diagV)); 2554 PetscCall(VecDestroy(&offdiagV)); 2555 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2556 PetscFunctionReturn(PETSC_SUCCESS); 2557 } 2558 2559 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2560 { 2561 Mat *dummy; 2562 2563 PetscFunctionBegin; 2564 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2565 *newmat = *dummy; 2566 PetscCall(PetscFree(dummy)); 2567 PetscFunctionReturn(PETSC_SUCCESS); 2568 } 2569 2570 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2571 { 2572 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2573 2574 PetscFunctionBegin; 2575 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2576 A->factorerrortype = a->A->factorerrortype; 2577 PetscFunctionReturn(PETSC_SUCCESS); 2578 } 2579 2580 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2581 { 2582 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2583 2584 PetscFunctionBegin; 2585 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2586 PetscCall(MatSetRandom(aij->A, rctx)); 2587 if (x->assembled) { 2588 PetscCall(MatSetRandom(aij->B, rctx)); 2589 } else { 2590 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2591 } 2592 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2593 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2598 { 2599 PetscFunctionBegin; 2600 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2601 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2602 PetscFunctionReturn(PETSC_SUCCESS); 2603 } 2604 2605 /*@ 2606 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2607 2608 Not Collective 2609 2610 Input Parameter: 2611 . A - the matrix 2612 2613 Output Parameter: 2614 . nz - the number of nonzeros 2615 2616 Level: advanced 2617 2618 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2619 @*/ 2620 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2621 { 2622 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2623 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2624 PetscBool isaij; 2625 2626 PetscFunctionBegin; 2627 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2628 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2629 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2630 PetscFunctionReturn(PETSC_SUCCESS); 2631 } 2632 2633 /*@ 2634 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2635 2636 Collective 2637 2638 Input Parameters: 2639 + A - the matrix 2640 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2641 2642 Level: advanced 2643 2644 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2645 @*/ 2646 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2647 { 2648 PetscFunctionBegin; 2649 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2650 PetscFunctionReturn(PETSC_SUCCESS); 2651 } 2652 2653 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2654 { 2655 PetscBool sc = PETSC_FALSE, flg; 2656 2657 PetscFunctionBegin; 2658 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2659 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2660 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2661 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2662 PetscOptionsHeadEnd(); 2663 PetscFunctionReturn(PETSC_SUCCESS); 2664 } 2665 2666 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2667 { 2668 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2669 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2670 2671 PetscFunctionBegin; 2672 if (!Y->preallocated) { 2673 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2674 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2675 PetscInt nonew = aij->nonew; 2676 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2677 aij->nonew = nonew; 2678 } 2679 PetscCall(MatShift_Basic(Y, a)); 2680 PetscFunctionReturn(PETSC_SUCCESS); 2681 } 2682 2683 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2684 { 2685 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2686 2687 PetscFunctionBegin; 2688 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2689 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2690 if (d) { 2691 PetscInt rstart; 2692 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2693 *d += rstart; 2694 } 2695 PetscFunctionReturn(PETSC_SUCCESS); 2696 } 2697 2698 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2699 { 2700 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2701 2702 PetscFunctionBegin; 2703 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2704 PetscFunctionReturn(PETSC_SUCCESS); 2705 } 2706 2707 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2708 { 2709 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2710 2711 PetscFunctionBegin; 2712 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2713 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2714 PetscFunctionReturn(PETSC_SUCCESS); 2715 } 2716 2717 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2718 MatGetRow_MPIAIJ, 2719 MatRestoreRow_MPIAIJ, 2720 MatMult_MPIAIJ, 2721 /* 4*/ MatMultAdd_MPIAIJ, 2722 MatMultTranspose_MPIAIJ, 2723 MatMultTransposeAdd_MPIAIJ, 2724 NULL, 2725 NULL, 2726 NULL, 2727 /*10*/ NULL, 2728 NULL, 2729 NULL, 2730 MatSOR_MPIAIJ, 2731 MatTranspose_MPIAIJ, 2732 /*15*/ MatGetInfo_MPIAIJ, 2733 MatEqual_MPIAIJ, 2734 MatGetDiagonal_MPIAIJ, 2735 MatDiagonalScale_MPIAIJ, 2736 MatNorm_MPIAIJ, 2737 /*20*/ MatAssemblyBegin_MPIAIJ, 2738 MatAssemblyEnd_MPIAIJ, 2739 MatSetOption_MPIAIJ, 2740 MatZeroEntries_MPIAIJ, 2741 /*24*/ MatZeroRows_MPIAIJ, 2742 NULL, 2743 NULL, 2744 NULL, 2745 NULL, 2746 /*29*/ MatSetUp_MPI_Hash, 2747 NULL, 2748 NULL, 2749 MatGetDiagonalBlock_MPIAIJ, 2750 NULL, 2751 /*34*/ MatDuplicate_MPIAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 NULL, 2756 /*39*/ MatAXPY_MPIAIJ, 2757 MatCreateSubMatrices_MPIAIJ, 2758 MatIncreaseOverlap_MPIAIJ, 2759 MatGetValues_MPIAIJ, 2760 MatCopy_MPIAIJ, 2761 /*44*/ MatGetRowMax_MPIAIJ, 2762 MatScale_MPIAIJ, 2763 MatShift_MPIAIJ, 2764 MatDiagonalSet_MPIAIJ, 2765 MatZeroRowsColumns_MPIAIJ, 2766 /*49*/ MatSetRandom_MPIAIJ, 2767 MatGetRowIJ_MPIAIJ, 2768 MatRestoreRowIJ_MPIAIJ, 2769 NULL, 2770 NULL, 2771 /*54*/ MatFDColoringCreate_MPIXAIJ, 2772 NULL, 2773 MatSetUnfactored_MPIAIJ, 2774 MatPermute_MPIAIJ, 2775 NULL, 2776 /*59*/ MatCreateSubMatrix_MPIAIJ, 2777 MatDestroy_MPIAIJ, 2778 MatView_MPIAIJ, 2779 NULL, 2780 NULL, 2781 /*64*/ NULL, 2782 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2783 NULL, 2784 NULL, 2785 NULL, 2786 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2787 MatGetRowMinAbs_MPIAIJ, 2788 NULL, 2789 NULL, 2790 NULL, 2791 NULL, 2792 /*75*/ MatFDColoringApply_AIJ, 2793 MatSetFromOptions_MPIAIJ, 2794 NULL, 2795 NULL, 2796 MatFindZeroDiagonals_MPIAIJ, 2797 /*80*/ NULL, 2798 NULL, 2799 NULL, 2800 /*83*/ MatLoad_MPIAIJ, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 /*89*/ NULL, 2807 NULL, 2808 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2809 NULL, 2810 NULL, 2811 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2812 NULL, 2813 NULL, 2814 NULL, 2815 MatBindToCPU_MPIAIJ, 2816 /*99*/ MatProductSetFromOptions_MPIAIJ, 2817 NULL, 2818 NULL, 2819 MatConjugate_MPIAIJ, 2820 NULL, 2821 /*104*/ MatSetValuesRow_MPIAIJ, 2822 MatRealPart_MPIAIJ, 2823 MatImaginaryPart_MPIAIJ, 2824 NULL, 2825 NULL, 2826 /*109*/ NULL, 2827 NULL, 2828 MatGetRowMin_MPIAIJ, 2829 NULL, 2830 MatMissingDiagonal_MPIAIJ, 2831 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2832 NULL, 2833 MatGetGhosts_MPIAIJ, 2834 NULL, 2835 NULL, 2836 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2837 NULL, 2838 NULL, 2839 NULL, 2840 MatGetMultiProcBlock_MPIAIJ, 2841 /*124*/ MatFindNonzeroRows_MPIAIJ, 2842 MatGetColumnReductions_MPIAIJ, 2843 MatInvertBlockDiagonal_MPIAIJ, 2844 MatInvertVariableBlockDiagonal_MPIAIJ, 2845 MatCreateSubMatricesMPI_MPIAIJ, 2846 /*129*/ NULL, 2847 NULL, 2848 NULL, 2849 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2850 NULL, 2851 /*134*/ NULL, 2852 NULL, 2853 NULL, 2854 NULL, 2855 NULL, 2856 /*139*/ MatSetBlockSizes_MPIAIJ, 2857 NULL, 2858 NULL, 2859 MatFDColoringSetUp_MPIXAIJ, 2860 MatFindOffBlockDiagonalEntries_MPIAIJ, 2861 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2862 /*145*/ NULL, 2863 NULL, 2864 NULL, 2865 MatCreateGraph_Simple_AIJ, 2866 NULL, 2867 /*150*/ NULL, 2868 MatEliminateZeros_MPIAIJ, 2869 MatGetRowSumAbs_MPIAIJ, 2870 NULL}; 2871 2872 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2873 { 2874 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2875 2876 PetscFunctionBegin; 2877 PetscCall(MatStoreValues(aij->A)); 2878 PetscCall(MatStoreValues(aij->B)); 2879 PetscFunctionReturn(PETSC_SUCCESS); 2880 } 2881 2882 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2883 { 2884 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2885 2886 PetscFunctionBegin; 2887 PetscCall(MatRetrieveValues(aij->A)); 2888 PetscCall(MatRetrieveValues(aij->B)); 2889 PetscFunctionReturn(PETSC_SUCCESS); 2890 } 2891 2892 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2893 { 2894 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2895 PetscMPIInt size; 2896 2897 PetscFunctionBegin; 2898 if (B->hash_active) { 2899 B->ops[0] = b->cops; 2900 B->hash_active = PETSC_FALSE; 2901 } 2902 PetscCall(PetscLayoutSetUp(B->rmap)); 2903 PetscCall(PetscLayoutSetUp(B->cmap)); 2904 2905 #if defined(PETSC_USE_CTABLE) 2906 PetscCall(PetscHMapIDestroy(&b->colmap)); 2907 #else 2908 PetscCall(PetscFree(b->colmap)); 2909 #endif 2910 PetscCall(PetscFree(b->garray)); 2911 PetscCall(VecDestroy(&b->lvec)); 2912 PetscCall(VecScatterDestroy(&b->Mvctx)); 2913 2914 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2915 2916 MatSeqXAIJGetOptions_Private(b->B); 2917 PetscCall(MatDestroy(&b->B)); 2918 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2919 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2920 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2921 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2922 MatSeqXAIJRestoreOptions_Private(b->B); 2923 2924 MatSeqXAIJGetOptions_Private(b->A); 2925 PetscCall(MatDestroy(&b->A)); 2926 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2927 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2928 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2929 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2930 MatSeqXAIJRestoreOptions_Private(b->A); 2931 2932 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2933 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2934 B->preallocated = PETSC_TRUE; 2935 B->was_assembled = PETSC_FALSE; 2936 B->assembled = PETSC_FALSE; 2937 PetscFunctionReturn(PETSC_SUCCESS); 2938 } 2939 2940 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2941 { 2942 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2943 2944 PetscFunctionBegin; 2945 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2946 PetscCall(PetscLayoutSetUp(B->rmap)); 2947 PetscCall(PetscLayoutSetUp(B->cmap)); 2948 2949 #if defined(PETSC_USE_CTABLE) 2950 PetscCall(PetscHMapIDestroy(&b->colmap)); 2951 #else 2952 PetscCall(PetscFree(b->colmap)); 2953 #endif 2954 PetscCall(PetscFree(b->garray)); 2955 PetscCall(VecDestroy(&b->lvec)); 2956 PetscCall(VecScatterDestroy(&b->Mvctx)); 2957 2958 PetscCall(MatResetPreallocation(b->A)); 2959 PetscCall(MatResetPreallocation(b->B)); 2960 B->preallocated = PETSC_TRUE; 2961 B->was_assembled = PETSC_FALSE; 2962 B->assembled = PETSC_FALSE; 2963 PetscFunctionReturn(PETSC_SUCCESS); 2964 } 2965 2966 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2967 { 2968 Mat mat; 2969 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2970 2971 PetscFunctionBegin; 2972 *newmat = NULL; 2973 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2974 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2975 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2976 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2977 a = (Mat_MPIAIJ *)mat->data; 2978 2979 mat->factortype = matin->factortype; 2980 mat->assembled = matin->assembled; 2981 mat->insertmode = NOT_SET_VALUES; 2982 2983 a->size = oldmat->size; 2984 a->rank = oldmat->rank; 2985 a->donotstash = oldmat->donotstash; 2986 a->roworiented = oldmat->roworiented; 2987 a->rowindices = NULL; 2988 a->rowvalues = NULL; 2989 a->getrowactive = PETSC_FALSE; 2990 2991 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2992 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2993 if (matin->hash_active) { 2994 PetscCall(MatSetUp(mat)); 2995 } else { 2996 mat->preallocated = matin->preallocated; 2997 if (oldmat->colmap) { 2998 #if defined(PETSC_USE_CTABLE) 2999 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3000 #else 3001 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3002 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3003 #endif 3004 } else a->colmap = NULL; 3005 if (oldmat->garray) { 3006 PetscInt len; 3007 len = oldmat->B->cmap->n; 3008 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3009 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3010 } else a->garray = NULL; 3011 3012 /* It may happen MatDuplicate is called with a non-assembled matrix 3013 In fact, MatDuplicate only requires the matrix to be preallocated 3014 This may happen inside a DMCreateMatrix_Shell */ 3015 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3016 if (oldmat->Mvctx) { 3017 a->Mvctx = oldmat->Mvctx; 3018 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3019 } 3020 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3021 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3022 } 3023 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3024 *newmat = mat; 3025 PetscFunctionReturn(PETSC_SUCCESS); 3026 } 3027 3028 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3029 { 3030 PetscBool isbinary, ishdf5; 3031 3032 PetscFunctionBegin; 3033 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3034 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3035 /* force binary viewer to load .info file if it has not yet done so */ 3036 PetscCall(PetscViewerSetUp(viewer)); 3037 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3038 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3039 if (isbinary) { 3040 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3041 } else if (ishdf5) { 3042 #if defined(PETSC_HAVE_HDF5) 3043 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3044 #else 3045 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3046 #endif 3047 } else { 3048 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3049 } 3050 PetscFunctionReturn(PETSC_SUCCESS); 3051 } 3052 3053 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3054 { 3055 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3056 PetscInt *rowidxs, *colidxs; 3057 PetscScalar *matvals; 3058 3059 PetscFunctionBegin; 3060 PetscCall(PetscViewerSetUp(viewer)); 3061 3062 /* read in matrix header */ 3063 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3064 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3065 M = header[1]; 3066 N = header[2]; 3067 nz = header[3]; 3068 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3069 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3070 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3071 3072 /* set block sizes from the viewer's .info file */ 3073 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3074 /* set global sizes if not set already */ 3075 if (mat->rmap->N < 0) mat->rmap->N = M; 3076 if (mat->cmap->N < 0) mat->cmap->N = N; 3077 PetscCall(PetscLayoutSetUp(mat->rmap)); 3078 PetscCall(PetscLayoutSetUp(mat->cmap)); 3079 3080 /* check if the matrix sizes are correct */ 3081 PetscCall(MatGetSize(mat, &rows, &cols)); 3082 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3083 3084 /* read in row lengths and build row indices */ 3085 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3086 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3087 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3088 rowidxs[0] = 0; 3089 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3090 if (nz != PETSC_INT_MAX) { 3091 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3092 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3093 } 3094 3095 /* read in column indices and matrix values */ 3096 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3097 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3098 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3099 /* store matrix indices and values */ 3100 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3101 PetscCall(PetscFree(rowidxs)); 3102 PetscCall(PetscFree2(colidxs, matvals)); 3103 PetscFunctionReturn(PETSC_SUCCESS); 3104 } 3105 3106 /* Not scalable because of ISAllGather() unless getting all columns. */ 3107 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3108 { 3109 IS iscol_local; 3110 PetscBool isstride; 3111 PetscMPIInt lisstride = 0, gisstride; 3112 3113 PetscFunctionBegin; 3114 /* check if we are grabbing all columns*/ 3115 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3116 3117 if (isstride) { 3118 PetscInt start, len, mstart, mlen; 3119 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3120 PetscCall(ISGetLocalSize(iscol, &len)); 3121 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3122 if (mstart == start && mlen - mstart == len) lisstride = 1; 3123 } 3124 3125 PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3126 if (gisstride) { 3127 PetscInt N; 3128 PetscCall(MatGetSize(mat, NULL, &N)); 3129 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3130 PetscCall(ISSetIdentity(iscol_local)); 3131 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3132 } else { 3133 PetscInt cbs; 3134 PetscCall(ISGetBlockSize(iscol, &cbs)); 3135 PetscCall(ISAllGather(iscol, &iscol_local)); 3136 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3137 } 3138 3139 *isseq = iscol_local; 3140 PetscFunctionReturn(PETSC_SUCCESS); 3141 } 3142 3143 /* 3144 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3145 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3146 3147 Input Parameters: 3148 + mat - matrix 3149 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3150 i.e., mat->rstart <= isrow[i] < mat->rend 3151 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3152 i.e., mat->cstart <= iscol[i] < mat->cend 3153 3154 Output Parameters: 3155 + isrow_d - sequential row index set for retrieving mat->A 3156 . iscol_d - sequential column index set for retrieving mat->A 3157 . iscol_o - sequential column index set for retrieving mat->B 3158 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3159 */ 3160 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3161 { 3162 Vec x, cmap; 3163 const PetscInt *is_idx; 3164 PetscScalar *xarray, *cmaparray; 3165 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3167 Mat B = a->B; 3168 Vec lvec = a->lvec, lcmap; 3169 PetscInt i, cstart, cend, Bn = B->cmap->N; 3170 MPI_Comm comm; 3171 VecScatter Mvctx = a->Mvctx; 3172 3173 PetscFunctionBegin; 3174 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3175 PetscCall(ISGetLocalSize(iscol, &ncols)); 3176 3177 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3178 PetscCall(MatCreateVecs(mat, &x, NULL)); 3179 PetscCall(VecSet(x, -1.0)); 3180 PetscCall(VecDuplicate(x, &cmap)); 3181 PetscCall(VecSet(cmap, -1.0)); 3182 3183 /* Get start indices */ 3184 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3185 isstart -= ncols; 3186 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3187 3188 PetscCall(ISGetIndices(iscol, &is_idx)); 3189 PetscCall(VecGetArray(x, &xarray)); 3190 PetscCall(VecGetArray(cmap, &cmaparray)); 3191 PetscCall(PetscMalloc1(ncols, &idx)); 3192 for (i = 0; i < ncols; i++) { 3193 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3194 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3195 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3196 } 3197 PetscCall(VecRestoreArray(x, &xarray)); 3198 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3199 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3200 3201 /* Get iscol_d */ 3202 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3203 PetscCall(ISGetBlockSize(iscol, &i)); 3204 PetscCall(ISSetBlockSize(*iscol_d, i)); 3205 3206 /* Get isrow_d */ 3207 PetscCall(ISGetLocalSize(isrow, &m)); 3208 rstart = mat->rmap->rstart; 3209 PetscCall(PetscMalloc1(m, &idx)); 3210 PetscCall(ISGetIndices(isrow, &is_idx)); 3211 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3212 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3213 3214 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3215 PetscCall(ISGetBlockSize(isrow, &i)); 3216 PetscCall(ISSetBlockSize(*isrow_d, i)); 3217 3218 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3219 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3220 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 3222 PetscCall(VecDuplicate(lvec, &lcmap)); 3223 3224 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3225 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 3227 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3228 /* off-process column indices */ 3229 count = 0; 3230 PetscCall(PetscMalloc1(Bn, &idx)); 3231 PetscCall(PetscMalloc1(Bn, &cmap1)); 3232 3233 PetscCall(VecGetArray(lvec, &xarray)); 3234 PetscCall(VecGetArray(lcmap, &cmaparray)); 3235 for (i = 0; i < Bn; i++) { 3236 if (PetscRealPart(xarray[i]) > -1.0) { 3237 idx[count] = i; /* local column index in off-diagonal part B */ 3238 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3239 count++; 3240 } 3241 } 3242 PetscCall(VecRestoreArray(lvec, &xarray)); 3243 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3244 3245 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3246 /* cannot ensure iscol_o has same blocksize as iscol! */ 3247 3248 PetscCall(PetscFree(idx)); 3249 *garray = cmap1; 3250 3251 PetscCall(VecDestroy(&x)); 3252 PetscCall(VecDestroy(&cmap)); 3253 PetscCall(VecDestroy(&lcmap)); 3254 PetscFunctionReturn(PETSC_SUCCESS); 3255 } 3256 3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3259 { 3260 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3261 Mat M = NULL; 3262 MPI_Comm comm; 3263 IS iscol_d, isrow_d, iscol_o; 3264 Mat Asub = NULL, Bsub = NULL; 3265 PetscInt n; 3266 3267 PetscFunctionBegin; 3268 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3269 3270 if (call == MAT_REUSE_MATRIX) { 3271 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3272 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3273 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3274 3275 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3276 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3277 3278 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3279 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3280 3281 /* Update diagonal and off-diagonal portions of submat */ 3282 asub = (Mat_MPIAIJ *)(*submat)->data; 3283 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3284 PetscCall(ISGetLocalSize(iscol_o, &n)); 3285 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3286 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3287 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3288 3289 } else { /* call == MAT_INITIAL_MATRIX) */ 3290 const PetscInt *garray; 3291 PetscInt BsubN; 3292 3293 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3294 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3295 3296 /* Create local submatrices Asub and Bsub */ 3297 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3299 3300 /* Create submatrix M */ 3301 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3302 3303 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3304 asub = (Mat_MPIAIJ *)M->data; 3305 3306 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3307 n = asub->B->cmap->N; 3308 if (BsubN > n) { 3309 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3310 const PetscInt *idx; 3311 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3312 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3313 3314 PetscCall(PetscMalloc1(n, &idx_new)); 3315 j = 0; 3316 PetscCall(ISGetIndices(iscol_o, &idx)); 3317 for (i = 0; i < n; i++) { 3318 if (j >= BsubN) break; 3319 while (subgarray[i] > garray[j]) j++; 3320 3321 if (subgarray[i] == garray[j]) { 3322 idx_new[i] = idx[j++]; 3323 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3324 } 3325 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3326 3327 PetscCall(ISDestroy(&iscol_o)); 3328 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3329 3330 } else if (BsubN < n) { 3331 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3332 } 3333 3334 PetscCall(PetscFree(garray)); 3335 *submat = M; 3336 3337 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3338 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3339 PetscCall(ISDestroy(&isrow_d)); 3340 3341 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3342 PetscCall(ISDestroy(&iscol_d)); 3343 3344 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3345 PetscCall(ISDestroy(&iscol_o)); 3346 } 3347 PetscFunctionReturn(PETSC_SUCCESS); 3348 } 3349 3350 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3351 { 3352 IS iscol_local = NULL, isrow_d; 3353 PetscInt csize; 3354 PetscInt n, i, j, start, end; 3355 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3356 MPI_Comm comm; 3357 3358 PetscFunctionBegin; 3359 /* If isrow has same processor distribution as mat, 3360 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3361 if (call == MAT_REUSE_MATRIX) { 3362 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3363 if (isrow_d) { 3364 sameRowDist = PETSC_TRUE; 3365 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3366 } else { 3367 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3368 if (iscol_local) { 3369 sameRowDist = PETSC_TRUE; 3370 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3371 } 3372 } 3373 } else { 3374 /* Check if isrow has same processor distribution as mat */ 3375 sameDist[0] = PETSC_FALSE; 3376 PetscCall(ISGetLocalSize(isrow, &n)); 3377 if (!n) { 3378 sameDist[0] = PETSC_TRUE; 3379 } else { 3380 PetscCall(ISGetMinMax(isrow, &i, &j)); 3381 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3382 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3383 } 3384 3385 /* Check if iscol has same processor distribution as mat */ 3386 sameDist[1] = PETSC_FALSE; 3387 PetscCall(ISGetLocalSize(iscol, &n)); 3388 if (!n) { 3389 sameDist[1] = PETSC_TRUE; 3390 } else { 3391 PetscCall(ISGetMinMax(iscol, &i, &j)); 3392 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3393 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3394 } 3395 3396 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3397 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3398 sameRowDist = tsameDist[0]; 3399 } 3400 3401 if (sameRowDist) { 3402 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3403 /* isrow and iscol have same processor distribution as mat */ 3404 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3405 PetscFunctionReturn(PETSC_SUCCESS); 3406 } else { /* sameRowDist */ 3407 /* isrow has same processor distribution as mat */ 3408 if (call == MAT_INITIAL_MATRIX) { 3409 PetscBool sorted; 3410 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3411 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3412 PetscCall(ISGetSize(iscol, &i)); 3413 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3414 3415 PetscCall(ISSorted(iscol_local, &sorted)); 3416 if (sorted) { 3417 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3418 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3419 PetscFunctionReturn(PETSC_SUCCESS); 3420 } 3421 } else { /* call == MAT_REUSE_MATRIX */ 3422 IS iscol_sub; 3423 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3424 if (iscol_sub) { 3425 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3426 PetscFunctionReturn(PETSC_SUCCESS); 3427 } 3428 } 3429 } 3430 } 3431 3432 /* General case: iscol -> iscol_local which has global size of iscol */ 3433 if (call == MAT_REUSE_MATRIX) { 3434 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3435 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3436 } else { 3437 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3438 } 3439 3440 PetscCall(ISGetLocalSize(iscol, &csize)); 3441 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3442 3443 if (call == MAT_INITIAL_MATRIX) { 3444 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3445 PetscCall(ISDestroy(&iscol_local)); 3446 } 3447 PetscFunctionReturn(PETSC_SUCCESS); 3448 } 3449 3450 /*@C 3451 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3452 and "off-diagonal" part of the matrix in CSR format. 3453 3454 Collective 3455 3456 Input Parameters: 3457 + comm - MPI communicator 3458 . A - "diagonal" portion of matrix 3459 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3460 - garray - global index of `B` columns 3461 3462 Output Parameter: 3463 . mat - the matrix, with input `A` as its local diagonal matrix 3464 3465 Level: advanced 3466 3467 Notes: 3468 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3469 3470 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3471 3472 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3473 @*/ 3474 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3475 { 3476 Mat_MPIAIJ *maij; 3477 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3478 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3479 const PetscScalar *oa; 3480 Mat Bnew; 3481 PetscInt m, n, N; 3482 MatType mpi_mat_type; 3483 3484 PetscFunctionBegin; 3485 PetscCall(MatCreate(comm, mat)); 3486 PetscCall(MatGetSize(A, &m, &n)); 3487 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3488 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3489 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3490 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3491 3492 /* Get global columns of mat */ 3493 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3494 3495 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3496 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3497 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3498 PetscCall(MatSetType(*mat, mpi_mat_type)); 3499 3500 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3501 maij = (Mat_MPIAIJ *)(*mat)->data; 3502 3503 (*mat)->preallocated = PETSC_TRUE; 3504 3505 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3506 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3507 3508 /* Set A as diagonal portion of *mat */ 3509 maij->A = A; 3510 3511 nz = oi[m]; 3512 for (i = 0; i < nz; i++) { 3513 col = oj[i]; 3514 oj[i] = garray[col]; 3515 } 3516 3517 /* Set Bnew as off-diagonal portion of *mat */ 3518 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3519 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3520 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3521 bnew = (Mat_SeqAIJ *)Bnew->data; 3522 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3523 maij->B = Bnew; 3524 3525 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3526 3527 b->free_a = PETSC_FALSE; 3528 b->free_ij = PETSC_FALSE; 3529 PetscCall(MatDestroy(&B)); 3530 3531 bnew->free_a = PETSC_TRUE; 3532 bnew->free_ij = PETSC_TRUE; 3533 3534 /* condense columns of maij->B */ 3535 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3536 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3537 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3538 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3539 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3540 PetscFunctionReturn(PETSC_SUCCESS); 3541 } 3542 3543 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3544 3545 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3546 { 3547 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3548 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3549 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3550 Mat M, Msub, B = a->B; 3551 MatScalar *aa; 3552 Mat_SeqAIJ *aij; 3553 PetscInt *garray = a->garray, *colsub, Ncols; 3554 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3555 IS iscol_sub, iscmap; 3556 const PetscInt *is_idx, *cmap; 3557 PetscBool allcolumns = PETSC_FALSE; 3558 MPI_Comm comm; 3559 3560 PetscFunctionBegin; 3561 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3562 if (call == MAT_REUSE_MATRIX) { 3563 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3564 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3565 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3566 3567 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3568 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3569 3570 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3571 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3572 3573 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3574 3575 } else { /* call == MAT_INITIAL_MATRIX) */ 3576 PetscBool flg; 3577 3578 PetscCall(ISGetLocalSize(iscol, &n)); 3579 PetscCall(ISGetSize(iscol, &Ncols)); 3580 3581 /* (1) iscol -> nonscalable iscol_local */ 3582 /* Check for special case: each processor gets entire matrix columns */ 3583 PetscCall(ISIdentity(iscol_local, &flg)); 3584 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3585 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3586 if (allcolumns) { 3587 iscol_sub = iscol_local; 3588 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3589 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3590 3591 } else { 3592 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3593 PetscInt *idx, *cmap1, k; 3594 PetscCall(PetscMalloc1(Ncols, &idx)); 3595 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3596 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3597 count = 0; 3598 k = 0; 3599 for (i = 0; i < Ncols; i++) { 3600 j = is_idx[i]; 3601 if (j >= cstart && j < cend) { 3602 /* diagonal part of mat */ 3603 idx[count] = j; 3604 cmap1[count++] = i; /* column index in submat */ 3605 } else if (Bn) { 3606 /* off-diagonal part of mat */ 3607 if (j == garray[k]) { 3608 idx[count] = j; 3609 cmap1[count++] = i; /* column index in submat */ 3610 } else if (j > garray[k]) { 3611 while (j > garray[k] && k < Bn - 1) k++; 3612 if (j == garray[k]) { 3613 idx[count] = j; 3614 cmap1[count++] = i; /* column index in submat */ 3615 } 3616 } 3617 } 3618 } 3619 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3620 3621 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3622 PetscCall(ISGetBlockSize(iscol, &cbs)); 3623 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3624 3625 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3626 } 3627 3628 /* (3) Create sequential Msub */ 3629 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3630 } 3631 3632 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3633 aij = (Mat_SeqAIJ *)(Msub)->data; 3634 ii = aij->i; 3635 PetscCall(ISGetIndices(iscmap, &cmap)); 3636 3637 /* 3638 m - number of local rows 3639 Ncols - number of columns (same on all processors) 3640 rstart - first row in new global matrix generated 3641 */ 3642 PetscCall(MatGetSize(Msub, &m, NULL)); 3643 3644 if (call == MAT_INITIAL_MATRIX) { 3645 /* (4) Create parallel newmat */ 3646 PetscMPIInt rank, size; 3647 PetscInt csize; 3648 3649 PetscCallMPI(MPI_Comm_size(comm, &size)); 3650 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3651 3652 /* 3653 Determine the number of non-zeros in the diagonal and off-diagonal 3654 portions of the matrix in order to do correct preallocation 3655 */ 3656 3657 /* first get start and end of "diagonal" columns */ 3658 PetscCall(ISGetLocalSize(iscol, &csize)); 3659 if (csize == PETSC_DECIDE) { 3660 PetscCall(ISGetSize(isrow, &mglobal)); 3661 if (mglobal == Ncols) { /* square matrix */ 3662 nlocal = m; 3663 } else { 3664 nlocal = Ncols / size + ((Ncols % size) > rank); 3665 } 3666 } else { 3667 nlocal = csize; 3668 } 3669 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3670 rstart = rend - nlocal; 3671 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3672 3673 /* next, compute all the lengths */ 3674 jj = aij->j; 3675 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3676 olens = dlens + m; 3677 for (i = 0; i < m; i++) { 3678 jend = ii[i + 1] - ii[i]; 3679 olen = 0; 3680 dlen = 0; 3681 for (j = 0; j < jend; j++) { 3682 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3683 else dlen++; 3684 jj++; 3685 } 3686 olens[i] = olen; 3687 dlens[i] = dlen; 3688 } 3689 3690 PetscCall(ISGetBlockSize(isrow, &bs)); 3691 PetscCall(ISGetBlockSize(iscol, &cbs)); 3692 3693 PetscCall(MatCreate(comm, &M)); 3694 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3695 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3696 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3697 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3698 PetscCall(PetscFree(dlens)); 3699 3700 } else { /* call == MAT_REUSE_MATRIX */ 3701 M = *newmat; 3702 PetscCall(MatGetLocalSize(M, &i, NULL)); 3703 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3704 PetscCall(MatZeroEntries(M)); 3705 /* 3706 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3707 rather than the slower MatSetValues(). 3708 */ 3709 M->was_assembled = PETSC_TRUE; 3710 M->assembled = PETSC_FALSE; 3711 } 3712 3713 /* (5) Set values of Msub to *newmat */ 3714 PetscCall(PetscMalloc1(count, &colsub)); 3715 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3716 3717 jj = aij->j; 3718 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3719 for (i = 0; i < m; i++) { 3720 row = rstart + i; 3721 nz = ii[i + 1] - ii[i]; 3722 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3723 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3724 jj += nz; 3725 aa += nz; 3726 } 3727 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3728 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3729 3730 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3731 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3732 3733 PetscCall(PetscFree(colsub)); 3734 3735 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3736 if (call == MAT_INITIAL_MATRIX) { 3737 *newmat = M; 3738 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3739 PetscCall(MatDestroy(&Msub)); 3740 3741 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3742 PetscCall(ISDestroy(&iscol_sub)); 3743 3744 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3745 PetscCall(ISDestroy(&iscmap)); 3746 3747 if (iscol_local) { 3748 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3749 PetscCall(ISDestroy(&iscol_local)); 3750 } 3751 } 3752 PetscFunctionReturn(PETSC_SUCCESS); 3753 } 3754 3755 /* 3756 Not great since it makes two copies of the submatrix, first an SeqAIJ 3757 in local and then by concatenating the local matrices the end result. 3758 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3759 3760 This requires a sequential iscol with all indices. 3761 */ 3762 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3763 { 3764 PetscMPIInt rank, size; 3765 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3766 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3767 Mat M, Mreuse; 3768 MatScalar *aa, *vwork; 3769 MPI_Comm comm; 3770 Mat_SeqAIJ *aij; 3771 PetscBool colflag, allcolumns = PETSC_FALSE; 3772 3773 PetscFunctionBegin; 3774 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3775 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3776 PetscCallMPI(MPI_Comm_size(comm, &size)); 3777 3778 /* Check for special case: each processor gets entire matrix columns */ 3779 PetscCall(ISIdentity(iscol, &colflag)); 3780 PetscCall(ISGetLocalSize(iscol, &n)); 3781 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3782 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3783 3784 if (call == MAT_REUSE_MATRIX) { 3785 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3786 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3787 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3788 } else { 3789 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3790 } 3791 3792 /* 3793 m - number of local rows 3794 n - number of columns (same on all processors) 3795 rstart - first row in new global matrix generated 3796 */ 3797 PetscCall(MatGetSize(Mreuse, &m, &n)); 3798 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3799 if (call == MAT_INITIAL_MATRIX) { 3800 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3801 ii = aij->i; 3802 jj = aij->j; 3803 3804 /* 3805 Determine the number of non-zeros in the diagonal and off-diagonal 3806 portions of the matrix in order to do correct preallocation 3807 */ 3808 3809 /* first get start and end of "diagonal" columns */ 3810 if (csize == PETSC_DECIDE) { 3811 PetscCall(ISGetSize(isrow, &mglobal)); 3812 if (mglobal == n) { /* square matrix */ 3813 nlocal = m; 3814 } else { 3815 nlocal = n / size + ((n % size) > rank); 3816 } 3817 } else { 3818 nlocal = csize; 3819 } 3820 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3821 rstart = rend - nlocal; 3822 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3823 3824 /* next, compute all the lengths */ 3825 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3826 olens = dlens + m; 3827 for (i = 0; i < m; i++) { 3828 jend = ii[i + 1] - ii[i]; 3829 olen = 0; 3830 dlen = 0; 3831 for (j = 0; j < jend; j++) { 3832 if (*jj < rstart || *jj >= rend) olen++; 3833 else dlen++; 3834 jj++; 3835 } 3836 olens[i] = olen; 3837 dlens[i] = dlen; 3838 } 3839 PetscCall(MatCreate(comm, &M)); 3840 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3841 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3842 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3843 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3844 PetscCall(PetscFree(dlens)); 3845 } else { 3846 PetscInt ml, nl; 3847 3848 M = *newmat; 3849 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3850 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3851 PetscCall(MatZeroEntries(M)); 3852 /* 3853 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3854 rather than the slower MatSetValues(). 3855 */ 3856 M->was_assembled = PETSC_TRUE; 3857 M->assembled = PETSC_FALSE; 3858 } 3859 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3860 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3861 ii = aij->i; 3862 jj = aij->j; 3863 3864 /* trigger copy to CPU if needed */ 3865 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3866 for (i = 0; i < m; i++) { 3867 row = rstart + i; 3868 nz = ii[i + 1] - ii[i]; 3869 cwork = jj; 3870 jj = PetscSafePointerPlusOffset(jj, nz); 3871 vwork = aa; 3872 aa = PetscSafePointerPlusOffset(aa, nz); 3873 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3874 } 3875 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3876 3877 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3878 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3879 *newmat = M; 3880 3881 /* save submatrix used in processor for next request */ 3882 if (call == MAT_INITIAL_MATRIX) { 3883 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3884 PetscCall(MatDestroy(&Mreuse)); 3885 } 3886 PetscFunctionReturn(PETSC_SUCCESS); 3887 } 3888 3889 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3890 { 3891 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3892 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3893 const PetscInt *JJ; 3894 PetscBool nooffprocentries; 3895 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3896 3897 PetscFunctionBegin; 3898 PetscCall(PetscLayoutSetUp(B->rmap)); 3899 PetscCall(PetscLayoutSetUp(B->cmap)); 3900 m = B->rmap->n; 3901 cstart = B->cmap->rstart; 3902 cend = B->cmap->rend; 3903 rstart = B->rmap->rstart; 3904 irstart = Ii[0]; 3905 3906 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3907 3908 if (PetscDefined(USE_DEBUG)) { 3909 for (i = 0; i < m; i++) { 3910 nnz = Ii[i + 1] - Ii[i]; 3911 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3912 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3913 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3914 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3915 } 3916 } 3917 3918 for (i = 0; i < m; i++) { 3919 nnz = Ii[i + 1] - Ii[i]; 3920 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3921 nnz_max = PetscMax(nnz_max, nnz); 3922 d = 0; 3923 for (j = 0; j < nnz; j++) { 3924 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3925 } 3926 d_nnz[i] = d; 3927 o_nnz[i] = nnz - d; 3928 } 3929 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3930 PetscCall(PetscFree2(d_nnz, o_nnz)); 3931 3932 for (i = 0; i < m; i++) { 3933 ii = i + rstart; 3934 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3935 } 3936 nooffprocentries = B->nooffprocentries; 3937 B->nooffprocentries = PETSC_TRUE; 3938 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3939 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3940 B->nooffprocentries = nooffprocentries; 3941 3942 /* count number of entries below block diagonal */ 3943 PetscCall(PetscFree(Aij->ld)); 3944 PetscCall(PetscCalloc1(m, &ld)); 3945 Aij->ld = ld; 3946 for (i = 0; i < m; i++) { 3947 nnz = Ii[i + 1] - Ii[i]; 3948 j = 0; 3949 while (j < nnz && J[j] < cstart) j++; 3950 ld[i] = j; 3951 if (J) J += nnz; 3952 } 3953 3954 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3955 PetscFunctionReturn(PETSC_SUCCESS); 3956 } 3957 3958 /*@ 3959 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3960 (the default parallel PETSc format). 3961 3962 Collective 3963 3964 Input Parameters: 3965 + B - the matrix 3966 . i - the indices into `j` for the start of each local row (indices start with zero) 3967 . j - the column indices for each local row (indices start with zero) 3968 - v - optional values in the matrix 3969 3970 Level: developer 3971 3972 Notes: 3973 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3974 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3975 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3976 3977 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3978 3979 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3980 3981 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3982 3983 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3984 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3985 3986 The format which is used for the sparse matrix input, is equivalent to a 3987 row-major ordering.. i.e for the following matrix, the input data expected is 3988 as shown 3989 .vb 3990 1 0 0 3991 2 0 3 P0 3992 ------- 3993 4 5 6 P1 3994 3995 Process0 [P0] rows_owned=[0,1] 3996 i = {0,1,3} [size = nrow+1 = 2+1] 3997 j = {0,0,2} [size = 3] 3998 v = {1,2,3} [size = 3] 3999 4000 Process1 [P1] rows_owned=[2] 4001 i = {0,3} [size = nrow+1 = 1+1] 4002 j = {0,1,2} [size = 3] 4003 v = {4,5,6} [size = 3] 4004 .ve 4005 4006 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4007 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4008 @*/ 4009 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4010 { 4011 PetscFunctionBegin; 4012 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4013 PetscFunctionReturn(PETSC_SUCCESS); 4014 } 4015 4016 /*@ 4017 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4018 (the default parallel PETSc format). For good matrix assembly performance 4019 the user should preallocate the matrix storage by setting the parameters 4020 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4021 4022 Collective 4023 4024 Input Parameters: 4025 + B - the matrix 4026 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4027 (same value is used for all local rows) 4028 . d_nnz - array containing the number of nonzeros in the various rows of the 4029 DIAGONAL portion of the local submatrix (possibly different for each row) 4030 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4031 The size of this array is equal to the number of local rows, i.e 'm'. 4032 For matrices that will be factored, you must leave room for (and set) 4033 the diagonal entry even if it is zero. 4034 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4035 submatrix (same value is used for all local rows). 4036 - o_nnz - array containing the number of nonzeros in the various rows of the 4037 OFF-DIAGONAL portion of the local submatrix (possibly different for 4038 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4039 structure. The size of this array is equal to the number 4040 of local rows, i.e 'm'. 4041 4042 Example Usage: 4043 Consider the following 8x8 matrix with 34 non-zero values, that is 4044 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4045 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4046 as follows 4047 4048 .vb 4049 1 2 0 | 0 3 0 | 0 4 4050 Proc0 0 5 6 | 7 0 0 | 8 0 4051 9 0 10 | 11 0 0 | 12 0 4052 ------------------------------------- 4053 13 0 14 | 15 16 17 | 0 0 4054 Proc1 0 18 0 | 19 20 21 | 0 0 4055 0 0 0 | 22 23 0 | 24 0 4056 ------------------------------------- 4057 Proc2 25 26 27 | 0 0 28 | 29 0 4058 30 0 0 | 31 32 33 | 0 34 4059 .ve 4060 4061 This can be represented as a collection of submatrices as 4062 .vb 4063 A B C 4064 D E F 4065 G H I 4066 .ve 4067 4068 Where the submatrices A,B,C are owned by proc0, D,E,F are 4069 owned by proc1, G,H,I are owned by proc2. 4070 4071 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4072 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4073 The 'M','N' parameters are 8,8, and have the same values on all procs. 4074 4075 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4076 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4077 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4078 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4079 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4080 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4081 4082 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4083 allocated for every row of the local diagonal submatrix, and `o_nz` 4084 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4085 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4086 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4087 In this case, the values of `d_nz`, `o_nz` are 4088 .vb 4089 proc0 dnz = 2, o_nz = 2 4090 proc1 dnz = 3, o_nz = 2 4091 proc2 dnz = 1, o_nz = 4 4092 .ve 4093 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4094 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4095 for proc3. i.e we are using 12+15+10=37 storage locations to store 4096 34 values. 4097 4098 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4099 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4100 In the above case the values for `d_nnz`, `o_nnz` are 4101 .vb 4102 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4103 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4104 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4105 .ve 4106 Here the space allocated is sum of all the above values i.e 34, and 4107 hence pre-allocation is perfect. 4108 4109 Level: intermediate 4110 4111 Notes: 4112 If the *_nnz parameter is given then the *_nz parameter is ignored 4113 4114 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4115 storage. The stored row and column indices begin with zero. 4116 See [Sparse Matrices](sec_matsparse) for details. 4117 4118 The parallel matrix is partitioned such that the first m0 rows belong to 4119 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4120 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4121 4122 The DIAGONAL portion of the local submatrix of a processor can be defined 4123 as the submatrix which is obtained by extraction the part corresponding to 4124 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4125 first row that belongs to the processor, r2 is the last row belonging to 4126 the this processor, and c1-c2 is range of indices of the local part of a 4127 vector suitable for applying the matrix to. This is an mxn matrix. In the 4128 common case of a square matrix, the row and column ranges are the same and 4129 the DIAGONAL part is also square. The remaining portion of the local 4130 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4131 4132 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4133 4134 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4135 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4136 You can also run with the option `-info` and look for messages with the string 4137 malloc in them to see if additional memory allocation was needed. 4138 4139 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4140 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4141 @*/ 4142 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4143 { 4144 PetscFunctionBegin; 4145 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4146 PetscValidType(B, 1); 4147 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4148 PetscFunctionReturn(PETSC_SUCCESS); 4149 } 4150 4151 /*@ 4152 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4153 CSR format for the local rows. 4154 4155 Collective 4156 4157 Input Parameters: 4158 + comm - MPI communicator 4159 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4160 . n - This value should be the same as the local size used in creating the 4161 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4162 calculated if `N` is given) For square matrices n is almost always `m`. 4163 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4164 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4165 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4166 . j - global column indices 4167 - a - optional matrix values 4168 4169 Output Parameter: 4170 . mat - the matrix 4171 4172 Level: intermediate 4173 4174 Notes: 4175 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4176 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4177 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4178 4179 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4180 4181 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4182 4183 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4184 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4185 4186 The format which is used for the sparse matrix input, is equivalent to a 4187 row-major ordering, i.e., for the following matrix, the input data expected is 4188 as shown 4189 .vb 4190 1 0 0 4191 2 0 3 P0 4192 ------- 4193 4 5 6 P1 4194 4195 Process0 [P0] rows_owned=[0,1] 4196 i = {0,1,3} [size = nrow+1 = 2+1] 4197 j = {0,0,2} [size = 3] 4198 v = {1,2,3} [size = 3] 4199 4200 Process1 [P1] rows_owned=[2] 4201 i = {0,3} [size = nrow+1 = 1+1] 4202 j = {0,1,2} [size = 3] 4203 v = {4,5,6} [size = 3] 4204 .ve 4205 4206 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4207 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4208 @*/ 4209 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4210 { 4211 PetscFunctionBegin; 4212 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4213 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4214 PetscCall(MatCreate(comm, mat)); 4215 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4216 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4217 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4218 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4219 PetscFunctionReturn(PETSC_SUCCESS); 4220 } 4221 4222 /*@ 4223 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4224 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4225 from `MatCreateMPIAIJWithArrays()` 4226 4227 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4228 4229 Collective 4230 4231 Input Parameters: 4232 + mat - the matrix 4233 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4234 . n - This value should be the same as the local size used in creating the 4235 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4236 calculated if N is given) For square matrices n is almost always m. 4237 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4238 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4239 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4240 . J - column indices 4241 - v - matrix values 4242 4243 Level: deprecated 4244 4245 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4246 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4247 @*/ 4248 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4249 { 4250 PetscInt nnz, i; 4251 PetscBool nooffprocentries; 4252 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4253 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4254 PetscScalar *ad, *ao; 4255 PetscInt ldi, Iii, md; 4256 const PetscInt *Adi = Ad->i; 4257 PetscInt *ld = Aij->ld; 4258 4259 PetscFunctionBegin; 4260 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4261 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4262 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4263 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4264 4265 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4266 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4267 4268 for (i = 0; i < m; i++) { 4269 if (PetscDefined(USE_DEBUG)) { 4270 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4271 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4272 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4273 } 4274 } 4275 nnz = Ii[i + 1] - Ii[i]; 4276 Iii = Ii[i]; 4277 ldi = ld[i]; 4278 md = Adi[i + 1] - Adi[i]; 4279 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4280 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4281 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4282 ad += md; 4283 ao += nnz - md; 4284 } 4285 nooffprocentries = mat->nooffprocentries; 4286 mat->nooffprocentries = PETSC_TRUE; 4287 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4288 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4289 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4290 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4291 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4292 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4293 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4294 mat->nooffprocentries = nooffprocentries; 4295 PetscFunctionReturn(PETSC_SUCCESS); 4296 } 4297 4298 /*@ 4299 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4300 4301 Collective 4302 4303 Input Parameters: 4304 + mat - the matrix 4305 - v - matrix values, stored by row 4306 4307 Level: intermediate 4308 4309 Notes: 4310 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4311 4312 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4313 4314 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4315 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4316 @*/ 4317 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4318 { 4319 PetscInt nnz, i, m; 4320 PetscBool nooffprocentries; 4321 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4322 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4323 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4324 PetscScalar *ad, *ao; 4325 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4326 PetscInt ldi, Iii, md; 4327 PetscInt *ld = Aij->ld; 4328 4329 PetscFunctionBegin; 4330 m = mat->rmap->n; 4331 4332 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4333 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4334 Iii = 0; 4335 for (i = 0; i < m; i++) { 4336 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4337 ldi = ld[i]; 4338 md = Adi[i + 1] - Adi[i]; 4339 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4340 ad += md; 4341 if (ao) { 4342 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4343 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4344 ao += nnz - md; 4345 } 4346 Iii += nnz; 4347 } 4348 nooffprocentries = mat->nooffprocentries; 4349 mat->nooffprocentries = PETSC_TRUE; 4350 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4351 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4352 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4353 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4354 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4355 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4356 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4357 mat->nooffprocentries = nooffprocentries; 4358 PetscFunctionReturn(PETSC_SUCCESS); 4359 } 4360 4361 /*@ 4362 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4363 (the default parallel PETSc format). For good matrix assembly performance 4364 the user should preallocate the matrix storage by setting the parameters 4365 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4366 4367 Collective 4368 4369 Input Parameters: 4370 + comm - MPI communicator 4371 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4372 This value should be the same as the local size used in creating the 4373 y vector for the matrix-vector product y = Ax. 4374 . n - This value should be the same as the local size used in creating the 4375 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4376 calculated if N is given) For square matrices n is almost always m. 4377 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4378 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4379 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4380 (same value is used for all local rows) 4381 . d_nnz - array containing the number of nonzeros in the various rows of the 4382 DIAGONAL portion of the local submatrix (possibly different for each row) 4383 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4384 The size of this array is equal to the number of local rows, i.e 'm'. 4385 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4386 submatrix (same value is used for all local rows). 4387 - o_nnz - array containing the number of nonzeros in the various rows of the 4388 OFF-DIAGONAL portion of the local submatrix (possibly different for 4389 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4390 structure. The size of this array is equal to the number 4391 of local rows, i.e 'm'. 4392 4393 Output Parameter: 4394 . A - the matrix 4395 4396 Options Database Keys: 4397 + -mat_no_inode - Do not use inodes 4398 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4399 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4400 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4401 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4402 4403 Level: intermediate 4404 4405 Notes: 4406 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4407 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4408 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4409 4410 If the *_nnz parameter is given then the *_nz parameter is ignored 4411 4412 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4413 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4414 storage requirements for this matrix. 4415 4416 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4417 processor than it must be used on all processors that share the object for 4418 that argument. 4419 4420 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4421 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4422 4423 The user MUST specify either the local or global matrix dimensions 4424 (possibly both). 4425 4426 The parallel matrix is partitioned across processors such that the 4427 first `m0` rows belong to process 0, the next `m1` rows belong to 4428 process 1, the next `m2` rows belong to process 2, etc., where 4429 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4430 values corresponding to [m x N] submatrix. 4431 4432 The columns are logically partitioned with the n0 columns belonging 4433 to 0th partition, the next n1 columns belonging to the next 4434 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4435 4436 The DIAGONAL portion of the local submatrix on any given processor 4437 is the submatrix corresponding to the rows and columns m,n 4438 corresponding to the given processor. i.e diagonal matrix on 4439 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4440 etc. The remaining portion of the local submatrix [m x (N-n)] 4441 constitute the OFF-DIAGONAL portion. The example below better 4442 illustrates this concept. 4443 4444 For a square global matrix we define each processor's diagonal portion 4445 to be its local rows and the corresponding columns (a square submatrix); 4446 each processor's off-diagonal portion encompasses the remainder of the 4447 local matrix (a rectangular submatrix). 4448 4449 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4450 4451 When calling this routine with a single process communicator, a matrix of 4452 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4453 type of communicator, use the construction mechanism 4454 .vb 4455 MatCreate(..., &A); 4456 MatSetType(A, MATMPIAIJ); 4457 MatSetSizes(A, m, n, M, N); 4458 MatMPIAIJSetPreallocation(A, ...); 4459 .ve 4460 4461 By default, this format uses inodes (identical nodes) when possible. 4462 We search for consecutive rows with the same nonzero structure, thereby 4463 reusing matrix information to achieve increased efficiency. 4464 4465 Example Usage: 4466 Consider the following 8x8 matrix with 34 non-zero values, that is 4467 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4468 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4469 as follows 4470 4471 .vb 4472 1 2 0 | 0 3 0 | 0 4 4473 Proc0 0 5 6 | 7 0 0 | 8 0 4474 9 0 10 | 11 0 0 | 12 0 4475 ------------------------------------- 4476 13 0 14 | 15 16 17 | 0 0 4477 Proc1 0 18 0 | 19 20 21 | 0 0 4478 0 0 0 | 22 23 0 | 24 0 4479 ------------------------------------- 4480 Proc2 25 26 27 | 0 0 28 | 29 0 4481 30 0 0 | 31 32 33 | 0 34 4482 .ve 4483 4484 This can be represented as a collection of submatrices as 4485 4486 .vb 4487 A B C 4488 D E F 4489 G H I 4490 .ve 4491 4492 Where the submatrices A,B,C are owned by proc0, D,E,F are 4493 owned by proc1, G,H,I are owned by proc2. 4494 4495 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4496 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4497 The 'M','N' parameters are 8,8, and have the same values on all procs. 4498 4499 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4500 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4501 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4502 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4503 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4504 matrix, ans [DF] as another SeqAIJ matrix. 4505 4506 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4507 allocated for every row of the local diagonal submatrix, and `o_nz` 4508 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4509 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4510 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4511 In this case, the values of `d_nz`,`o_nz` are 4512 .vb 4513 proc0 dnz = 2, o_nz = 2 4514 proc1 dnz = 3, o_nz = 2 4515 proc2 dnz = 1, o_nz = 4 4516 .ve 4517 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4518 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4519 for proc3. i.e we are using 12+15+10=37 storage locations to store 4520 34 values. 4521 4522 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4523 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4524 In the above case the values for d_nnz,o_nnz are 4525 .vb 4526 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4527 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4528 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4529 .ve 4530 Here the space allocated is sum of all the above values i.e 34, and 4531 hence pre-allocation is perfect. 4532 4533 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4534 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4535 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4536 @*/ 4537 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4538 { 4539 PetscMPIInt size; 4540 4541 PetscFunctionBegin; 4542 PetscCall(MatCreate(comm, A)); 4543 PetscCall(MatSetSizes(*A, m, n, M, N)); 4544 PetscCallMPI(MPI_Comm_size(comm, &size)); 4545 if (size > 1) { 4546 PetscCall(MatSetType(*A, MATMPIAIJ)); 4547 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4548 } else { 4549 PetscCall(MatSetType(*A, MATSEQAIJ)); 4550 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4551 } 4552 PetscFunctionReturn(PETSC_SUCCESS); 4553 } 4554 4555 /*MC 4556 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4557 4558 Synopsis: 4559 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4560 4561 Not Collective 4562 4563 Input Parameter: 4564 . A - the `MATMPIAIJ` matrix 4565 4566 Output Parameters: 4567 + Ad - the diagonal portion of the matrix 4568 . Ao - the off-diagonal portion of the matrix 4569 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4570 - ierr - error code 4571 4572 Level: advanced 4573 4574 Note: 4575 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4576 4577 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4578 M*/ 4579 4580 /*MC 4581 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4582 4583 Synopsis: 4584 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4585 4586 Not Collective 4587 4588 Input Parameters: 4589 + A - the `MATMPIAIJ` matrix 4590 . Ad - the diagonal portion of the matrix 4591 . Ao - the off-diagonal portion of the matrix 4592 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4593 - ierr - error code 4594 4595 Level: advanced 4596 4597 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4598 M*/ 4599 4600 /*@C 4601 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4602 4603 Not Collective 4604 4605 Input Parameter: 4606 . A - The `MATMPIAIJ` matrix 4607 4608 Output Parameters: 4609 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4610 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4611 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4612 4613 Level: intermediate 4614 4615 Note: 4616 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4617 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4618 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4619 local column numbers to global column numbers in the original matrix. 4620 4621 Fortran Notes: 4622 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4623 4624 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4625 @*/ 4626 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4627 { 4628 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4629 PetscBool flg; 4630 4631 PetscFunctionBegin; 4632 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4633 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4634 if (Ad) *Ad = a->A; 4635 if (Ao) *Ao = a->B; 4636 if (colmap) *colmap = a->garray; 4637 PetscFunctionReturn(PETSC_SUCCESS); 4638 } 4639 4640 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4641 { 4642 PetscInt m, N, i, rstart, nnz, Ii; 4643 PetscInt *indx; 4644 PetscScalar *values; 4645 MatType rootType; 4646 4647 PetscFunctionBegin; 4648 PetscCall(MatGetSize(inmat, &m, &N)); 4649 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4650 PetscInt *dnz, *onz, sum, bs, cbs; 4651 4652 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4653 /* Check sum(n) = N */ 4654 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4655 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4656 4657 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4658 rstart -= m; 4659 4660 MatPreallocateBegin(comm, m, n, dnz, onz); 4661 for (i = 0; i < m; i++) { 4662 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4663 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4664 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4665 } 4666 4667 PetscCall(MatCreate(comm, outmat)); 4668 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4669 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4670 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4671 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4672 PetscCall(MatSetType(*outmat, rootType)); 4673 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4674 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4675 MatPreallocateEnd(dnz, onz); 4676 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4677 } 4678 4679 /* numeric phase */ 4680 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4681 for (i = 0; i < m; i++) { 4682 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4683 Ii = i + rstart; 4684 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4685 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4686 } 4687 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4688 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4689 PetscFunctionReturn(PETSC_SUCCESS); 4690 } 4691 4692 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4693 { 4694 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4695 4696 PetscFunctionBegin; 4697 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4698 PetscCall(PetscFree(merge->id_r)); 4699 PetscCall(PetscFree(merge->len_s)); 4700 PetscCall(PetscFree(merge->len_r)); 4701 PetscCall(PetscFree(merge->bi)); 4702 PetscCall(PetscFree(merge->bj)); 4703 PetscCall(PetscFree(merge->buf_ri[0])); 4704 PetscCall(PetscFree(merge->buf_ri)); 4705 PetscCall(PetscFree(merge->buf_rj[0])); 4706 PetscCall(PetscFree(merge->buf_rj)); 4707 PetscCall(PetscFree(merge->coi)); 4708 PetscCall(PetscFree(merge->coj)); 4709 PetscCall(PetscFree(merge->owners_co)); 4710 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4711 PetscCall(PetscFree(merge)); 4712 PetscFunctionReturn(PETSC_SUCCESS); 4713 } 4714 4715 #include <../src/mat/utils/freespace.h> 4716 #include <petscbt.h> 4717 4718 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4719 { 4720 MPI_Comm comm; 4721 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4722 PetscMPIInt size, rank, taga, *len_s; 4723 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4724 PetscMPIInt proc, k; 4725 PetscInt **buf_ri, **buf_rj; 4726 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4727 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4728 MPI_Request *s_waits, *r_waits; 4729 MPI_Status *status; 4730 const MatScalar *aa, *a_a; 4731 MatScalar **abuf_r, *ba_i; 4732 Mat_Merge_SeqsToMPI *merge; 4733 PetscContainer container; 4734 4735 PetscFunctionBegin; 4736 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4737 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4738 4739 PetscCallMPI(MPI_Comm_size(comm, &size)); 4740 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4741 4742 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4743 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4744 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4745 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4746 aa = a_a; 4747 4748 bi = merge->bi; 4749 bj = merge->bj; 4750 buf_ri = merge->buf_ri; 4751 buf_rj = merge->buf_rj; 4752 4753 PetscCall(PetscMalloc1(size, &status)); 4754 owners = merge->rowmap->range; 4755 len_s = merge->len_s; 4756 4757 /* send and recv matrix values */ 4758 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4759 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4760 4761 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4762 for (proc = 0, k = 0; proc < size; proc++) { 4763 if (!len_s[proc]) continue; 4764 i = owners[proc]; 4765 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4766 k++; 4767 } 4768 4769 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4770 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4771 PetscCall(PetscFree(status)); 4772 4773 PetscCall(PetscFree(s_waits)); 4774 PetscCall(PetscFree(r_waits)); 4775 4776 /* insert mat values of mpimat */ 4777 PetscCall(PetscMalloc1(N, &ba_i)); 4778 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4779 4780 for (k = 0; k < merge->nrecv; k++) { 4781 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4782 nrows = *buf_ri_k[k]; 4783 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4784 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4785 } 4786 4787 /* set values of ba */ 4788 m = merge->rowmap->n; 4789 for (i = 0; i < m; i++) { 4790 arow = owners[rank] + i; 4791 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4792 bnzi = bi[i + 1] - bi[i]; 4793 PetscCall(PetscArrayzero(ba_i, bnzi)); 4794 4795 /* add local non-zero vals of this proc's seqmat into ba */ 4796 anzi = ai[arow + 1] - ai[arow]; 4797 aj = a->j + ai[arow]; 4798 aa = a_a + ai[arow]; 4799 nextaj = 0; 4800 for (j = 0; nextaj < anzi; j++) { 4801 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4802 ba_i[j] += aa[nextaj++]; 4803 } 4804 } 4805 4806 /* add received vals into ba */ 4807 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4808 /* i-th row */ 4809 if (i == *nextrow[k]) { 4810 anzi = *(nextai[k] + 1) - *nextai[k]; 4811 aj = buf_rj[k] + *nextai[k]; 4812 aa = abuf_r[k] + *nextai[k]; 4813 nextaj = 0; 4814 for (j = 0; nextaj < anzi; j++) { 4815 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4816 ba_i[j] += aa[nextaj++]; 4817 } 4818 } 4819 nextrow[k]++; 4820 nextai[k]++; 4821 } 4822 } 4823 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4824 } 4825 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4826 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4827 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4828 4829 PetscCall(PetscFree(abuf_r[0])); 4830 PetscCall(PetscFree(abuf_r)); 4831 PetscCall(PetscFree(ba_i)); 4832 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4833 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4834 PetscFunctionReturn(PETSC_SUCCESS); 4835 } 4836 4837 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4838 { 4839 Mat B_mpi; 4840 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4841 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4842 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4843 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4844 PetscInt len, *dnz, *onz, bs, cbs; 4845 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4846 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4847 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4848 MPI_Status *status; 4849 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4850 PetscBT lnkbt; 4851 Mat_Merge_SeqsToMPI *merge; 4852 PetscContainer container; 4853 4854 PetscFunctionBegin; 4855 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4856 4857 /* make sure it is a PETSc comm */ 4858 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4859 PetscCallMPI(MPI_Comm_size(comm, &size)); 4860 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4861 4862 PetscCall(PetscNew(&merge)); 4863 PetscCall(PetscMalloc1(size, &status)); 4864 4865 /* determine row ownership */ 4866 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4867 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4868 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4869 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4870 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4871 PetscCall(PetscMalloc1(size, &len_si)); 4872 PetscCall(PetscMalloc1(size, &merge->len_s)); 4873 4874 m = merge->rowmap->n; 4875 owners = merge->rowmap->range; 4876 4877 /* determine the number of messages to send, their lengths */ 4878 len_s = merge->len_s; 4879 4880 len = 0; /* length of buf_si[] */ 4881 merge->nsend = 0; 4882 for (PetscMPIInt proc = 0; proc < size; proc++) { 4883 len_si[proc] = 0; 4884 if (proc == rank) { 4885 len_s[proc] = 0; 4886 } else { 4887 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4888 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4889 } 4890 if (len_s[proc]) { 4891 merge->nsend++; 4892 nrows = 0; 4893 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4894 if (ai[i + 1] > ai[i]) nrows++; 4895 } 4896 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4897 len += len_si[proc]; 4898 } 4899 } 4900 4901 /* determine the number and length of messages to receive for ij-structure */ 4902 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4903 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4904 4905 /* post the Irecv of j-structure */ 4906 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4907 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4908 4909 /* post the Isend of j-structure */ 4910 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4911 4912 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4913 if (!len_s[proc]) continue; 4914 i = owners[proc]; 4915 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4916 k++; 4917 } 4918 4919 /* receives and sends of j-structure are complete */ 4920 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4921 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4922 4923 /* send and recv i-structure */ 4924 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4925 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4926 4927 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4928 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4929 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4930 if (!len_s[proc]) continue; 4931 /* form outgoing message for i-structure: 4932 buf_si[0]: nrows to be sent 4933 [1:nrows]: row index (global) 4934 [nrows+1:2*nrows+1]: i-structure index 4935 */ 4936 nrows = len_si[proc] / 2 - 1; 4937 buf_si_i = buf_si + nrows + 1; 4938 buf_si[0] = nrows; 4939 buf_si_i[0] = 0; 4940 nrows = 0; 4941 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4942 anzi = ai[i + 1] - ai[i]; 4943 if (anzi) { 4944 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4945 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4946 nrows++; 4947 } 4948 } 4949 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4950 k++; 4951 buf_si += len_si[proc]; 4952 } 4953 4954 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4955 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4956 4957 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4958 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4959 4960 PetscCall(PetscFree(len_si)); 4961 PetscCall(PetscFree(len_ri)); 4962 PetscCall(PetscFree(rj_waits)); 4963 PetscCall(PetscFree2(si_waits, sj_waits)); 4964 PetscCall(PetscFree(ri_waits)); 4965 PetscCall(PetscFree(buf_s)); 4966 PetscCall(PetscFree(status)); 4967 4968 /* compute a local seq matrix in each processor */ 4969 /* allocate bi array and free space for accumulating nonzero column info */ 4970 PetscCall(PetscMalloc1(m + 1, &bi)); 4971 bi[0] = 0; 4972 4973 /* create and initialize a linked list */ 4974 nlnk = N + 1; 4975 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4976 4977 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4978 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4979 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4980 4981 current_space = free_space; 4982 4983 /* determine symbolic info for each local row */ 4984 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4985 4986 for (k = 0; k < merge->nrecv; k++) { 4987 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4988 nrows = *buf_ri_k[k]; 4989 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4990 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4991 } 4992 4993 MatPreallocateBegin(comm, m, n, dnz, onz); 4994 len = 0; 4995 for (i = 0; i < m; i++) { 4996 bnzi = 0; 4997 /* add local non-zero cols of this proc's seqmat into lnk */ 4998 arow = owners[rank] + i; 4999 anzi = ai[arow + 1] - ai[arow]; 5000 aj = a->j + ai[arow]; 5001 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5002 bnzi += nlnk; 5003 /* add received col data into lnk */ 5004 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5005 if (i == *nextrow[k]) { /* i-th row */ 5006 anzi = *(nextai[k] + 1) - *nextai[k]; 5007 aj = buf_rj[k] + *nextai[k]; 5008 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5009 bnzi += nlnk; 5010 nextrow[k]++; 5011 nextai[k]++; 5012 } 5013 } 5014 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5015 5016 /* if free space is not available, make more free space */ 5017 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5018 /* copy data into free space, then initialize lnk */ 5019 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5020 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5021 5022 current_space->array += bnzi; 5023 current_space->local_used += bnzi; 5024 current_space->local_remaining -= bnzi; 5025 5026 bi[i + 1] = bi[i] + bnzi; 5027 } 5028 5029 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5030 5031 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5032 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5033 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5034 5035 /* create symbolic parallel matrix B_mpi */ 5036 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5037 PetscCall(MatCreate(comm, &B_mpi)); 5038 if (n == PETSC_DECIDE) { 5039 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5040 } else { 5041 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5042 } 5043 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5044 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5045 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5046 MatPreallocateEnd(dnz, onz); 5047 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5048 5049 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5050 B_mpi->assembled = PETSC_FALSE; 5051 merge->bi = bi; 5052 merge->bj = bj; 5053 merge->buf_ri = buf_ri; 5054 merge->buf_rj = buf_rj; 5055 merge->coi = NULL; 5056 merge->coj = NULL; 5057 merge->owners_co = NULL; 5058 5059 PetscCall(PetscCommDestroy(&comm)); 5060 5061 /* attach the supporting struct to B_mpi for reuse */ 5062 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5063 PetscCall(PetscContainerSetPointer(container, merge)); 5064 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5065 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5066 PetscCall(PetscContainerDestroy(&container)); 5067 *mpimat = B_mpi; 5068 5069 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5070 PetscFunctionReturn(PETSC_SUCCESS); 5071 } 5072 5073 /*@ 5074 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5075 matrices from each processor 5076 5077 Collective 5078 5079 Input Parameters: 5080 + comm - the communicators the parallel matrix will live on 5081 . seqmat - the input sequential matrices 5082 . m - number of local rows (or `PETSC_DECIDE`) 5083 . n - number of local columns (or `PETSC_DECIDE`) 5084 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5085 5086 Output Parameter: 5087 . mpimat - the parallel matrix generated 5088 5089 Level: advanced 5090 5091 Note: 5092 The dimensions of the sequential matrix in each processor MUST be the same. 5093 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5094 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5095 5096 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5097 @*/ 5098 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5099 { 5100 PetscMPIInt size; 5101 5102 PetscFunctionBegin; 5103 PetscCallMPI(MPI_Comm_size(comm, &size)); 5104 if (size == 1) { 5105 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5106 if (scall == MAT_INITIAL_MATRIX) { 5107 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5108 } else { 5109 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5110 } 5111 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5112 PetscFunctionReturn(PETSC_SUCCESS); 5113 } 5114 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5115 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5116 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5117 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5118 PetscFunctionReturn(PETSC_SUCCESS); 5119 } 5120 5121 /*@ 5122 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5123 5124 Not Collective 5125 5126 Input Parameter: 5127 . A - the matrix 5128 5129 Output Parameter: 5130 . A_loc - the local sequential matrix generated 5131 5132 Level: developer 5133 5134 Notes: 5135 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5136 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5137 `n` is the global column count obtained with `MatGetSize()` 5138 5139 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5140 5141 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5142 5143 Destroy the matrix with `MatDestroy()` 5144 5145 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5146 @*/ 5147 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5148 { 5149 PetscBool mpi; 5150 5151 PetscFunctionBegin; 5152 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5153 if (mpi) { 5154 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5155 } else { 5156 *A_loc = A; 5157 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5158 } 5159 PetscFunctionReturn(PETSC_SUCCESS); 5160 } 5161 5162 /*@ 5163 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5164 5165 Not Collective 5166 5167 Input Parameters: 5168 + A - the matrix 5169 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5170 5171 Output Parameter: 5172 . A_loc - the local sequential matrix generated 5173 5174 Level: developer 5175 5176 Notes: 5177 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5178 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5179 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5180 5181 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5182 5183 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5184 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5185 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5186 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5187 5188 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5189 @*/ 5190 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5191 { 5192 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5193 Mat_SeqAIJ *mat, *a, *b; 5194 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5195 const PetscScalar *aa, *ba, *aav, *bav; 5196 PetscScalar *ca, *cam; 5197 PetscMPIInt size; 5198 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5199 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5200 PetscBool match; 5201 5202 PetscFunctionBegin; 5203 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5204 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5205 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5206 if (size == 1) { 5207 if (scall == MAT_INITIAL_MATRIX) { 5208 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5209 *A_loc = mpimat->A; 5210 } else if (scall == MAT_REUSE_MATRIX) { 5211 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5212 } 5213 PetscFunctionReturn(PETSC_SUCCESS); 5214 } 5215 5216 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5217 a = (Mat_SeqAIJ *)mpimat->A->data; 5218 b = (Mat_SeqAIJ *)mpimat->B->data; 5219 ai = a->i; 5220 aj = a->j; 5221 bi = b->i; 5222 bj = b->j; 5223 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5224 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5225 aa = aav; 5226 ba = bav; 5227 if (scall == MAT_INITIAL_MATRIX) { 5228 PetscCall(PetscMalloc1(1 + am, &ci)); 5229 ci[0] = 0; 5230 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5231 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5232 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5233 k = 0; 5234 for (i = 0; i < am; i++) { 5235 ncols_o = bi[i + 1] - bi[i]; 5236 ncols_d = ai[i + 1] - ai[i]; 5237 /* off-diagonal portion of A */ 5238 for (jo = 0; jo < ncols_o; jo++) { 5239 col = cmap[*bj]; 5240 if (col >= cstart) break; 5241 cj[k] = col; 5242 bj++; 5243 ca[k++] = *ba++; 5244 } 5245 /* diagonal portion of A */ 5246 for (j = 0; j < ncols_d; j++) { 5247 cj[k] = cstart + *aj++; 5248 ca[k++] = *aa++; 5249 } 5250 /* off-diagonal portion of A */ 5251 for (j = jo; j < ncols_o; j++) { 5252 cj[k] = cmap[*bj++]; 5253 ca[k++] = *ba++; 5254 } 5255 } 5256 /* put together the new matrix */ 5257 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5258 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5259 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5260 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5261 mat->free_a = PETSC_TRUE; 5262 mat->free_ij = PETSC_TRUE; 5263 mat->nonew = 0; 5264 } else if (scall == MAT_REUSE_MATRIX) { 5265 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5266 ci = mat->i; 5267 cj = mat->j; 5268 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5269 for (i = 0; i < am; i++) { 5270 /* off-diagonal portion of A */ 5271 ncols_o = bi[i + 1] - bi[i]; 5272 for (jo = 0; jo < ncols_o; jo++) { 5273 col = cmap[*bj]; 5274 if (col >= cstart) break; 5275 *cam++ = *ba++; 5276 bj++; 5277 } 5278 /* diagonal portion of A */ 5279 ncols_d = ai[i + 1] - ai[i]; 5280 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5281 /* off-diagonal portion of A */ 5282 for (j = jo; j < ncols_o; j++) { 5283 *cam++ = *ba++; 5284 bj++; 5285 } 5286 } 5287 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5288 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5289 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5290 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5291 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5292 PetscFunctionReturn(PETSC_SUCCESS); 5293 } 5294 5295 /*@ 5296 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5297 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5298 5299 Not Collective 5300 5301 Input Parameters: 5302 + A - the matrix 5303 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5304 5305 Output Parameters: 5306 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5307 - A_loc - the local sequential matrix generated 5308 5309 Level: developer 5310 5311 Note: 5312 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5313 part, then those associated with the off-diagonal part (in its local ordering) 5314 5315 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5316 @*/ 5317 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5318 { 5319 Mat Ao, Ad; 5320 const PetscInt *cmap; 5321 PetscMPIInt size; 5322 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5323 5324 PetscFunctionBegin; 5325 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5326 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5327 if (size == 1) { 5328 if (scall == MAT_INITIAL_MATRIX) { 5329 PetscCall(PetscObjectReference((PetscObject)Ad)); 5330 *A_loc = Ad; 5331 } else if (scall == MAT_REUSE_MATRIX) { 5332 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5333 } 5334 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5335 PetscFunctionReturn(PETSC_SUCCESS); 5336 } 5337 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5338 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5339 if (f) { 5340 PetscCall((*f)(A, scall, glob, A_loc)); 5341 } else { 5342 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5343 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5344 Mat_SeqAIJ *c; 5345 PetscInt *ai = a->i, *aj = a->j; 5346 PetscInt *bi = b->i, *bj = b->j; 5347 PetscInt *ci, *cj; 5348 const PetscScalar *aa, *ba; 5349 PetscScalar *ca; 5350 PetscInt i, j, am, dn, on; 5351 5352 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5353 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5354 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5355 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5356 if (scall == MAT_INITIAL_MATRIX) { 5357 PetscInt k; 5358 PetscCall(PetscMalloc1(1 + am, &ci)); 5359 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5360 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5361 ci[0] = 0; 5362 for (i = 0, k = 0; i < am; i++) { 5363 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5364 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5365 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5366 /* diagonal portion of A */ 5367 for (j = 0; j < ncols_d; j++, k++) { 5368 cj[k] = *aj++; 5369 ca[k] = *aa++; 5370 } 5371 /* off-diagonal portion of A */ 5372 for (j = 0; j < ncols_o; j++, k++) { 5373 cj[k] = dn + *bj++; 5374 ca[k] = *ba++; 5375 } 5376 } 5377 /* put together the new matrix */ 5378 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5379 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5380 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5381 c = (Mat_SeqAIJ *)(*A_loc)->data; 5382 c->free_a = PETSC_TRUE; 5383 c->free_ij = PETSC_TRUE; 5384 c->nonew = 0; 5385 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5386 } else if (scall == MAT_REUSE_MATRIX) { 5387 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5388 for (i = 0; i < am; i++) { 5389 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5390 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5391 /* diagonal portion of A */ 5392 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5393 /* off-diagonal portion of A */ 5394 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5395 } 5396 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5397 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5398 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5399 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5400 if (glob) { 5401 PetscInt cst, *gidx; 5402 5403 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5404 PetscCall(PetscMalloc1(dn + on, &gidx)); 5405 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5406 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5407 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5408 } 5409 } 5410 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5411 PetscFunctionReturn(PETSC_SUCCESS); 5412 } 5413 5414 /*@C 5415 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5416 5417 Not Collective 5418 5419 Input Parameters: 5420 + A - the matrix 5421 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5422 . row - index set of rows to extract (or `NULL`) 5423 - col - index set of columns to extract (or `NULL`) 5424 5425 Output Parameter: 5426 . A_loc - the local sequential matrix generated 5427 5428 Level: developer 5429 5430 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5431 @*/ 5432 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5433 { 5434 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5435 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5436 IS isrowa, iscola; 5437 Mat *aloc; 5438 PetscBool match; 5439 5440 PetscFunctionBegin; 5441 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5442 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5443 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5444 if (!row) { 5445 start = A->rmap->rstart; 5446 end = A->rmap->rend; 5447 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5448 } else { 5449 isrowa = *row; 5450 } 5451 if (!col) { 5452 start = A->cmap->rstart; 5453 cmap = a->garray; 5454 nzA = a->A->cmap->n; 5455 nzB = a->B->cmap->n; 5456 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5457 ncols = 0; 5458 for (i = 0; i < nzB; i++) { 5459 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5460 else break; 5461 } 5462 imark = i; 5463 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5464 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5465 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5466 } else { 5467 iscola = *col; 5468 } 5469 if (scall != MAT_INITIAL_MATRIX) { 5470 PetscCall(PetscMalloc1(1, &aloc)); 5471 aloc[0] = *A_loc; 5472 } 5473 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5474 if (!col) { /* attach global id of condensed columns */ 5475 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5476 } 5477 *A_loc = aloc[0]; 5478 PetscCall(PetscFree(aloc)); 5479 if (!row) PetscCall(ISDestroy(&isrowa)); 5480 if (!col) PetscCall(ISDestroy(&iscola)); 5481 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5482 PetscFunctionReturn(PETSC_SUCCESS); 5483 } 5484 5485 /* 5486 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5487 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5488 * on a global size. 5489 * */ 5490 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5491 { 5492 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5493 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5494 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5495 PetscMPIInt owner; 5496 PetscSFNode *iremote, *oiremote; 5497 const PetscInt *lrowindices; 5498 PetscSF sf, osf; 5499 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5500 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5501 MPI_Comm comm; 5502 ISLocalToGlobalMapping mapping; 5503 const PetscScalar *pd_a, *po_a; 5504 5505 PetscFunctionBegin; 5506 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5507 /* plocalsize is the number of roots 5508 * nrows is the number of leaves 5509 * */ 5510 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5511 PetscCall(ISGetLocalSize(rows, &nrows)); 5512 PetscCall(PetscCalloc1(nrows, &iremote)); 5513 PetscCall(ISGetIndices(rows, &lrowindices)); 5514 for (i = 0; i < nrows; i++) { 5515 /* Find a remote index and an owner for a row 5516 * The row could be local or remote 5517 * */ 5518 owner = 0; 5519 lidx = 0; 5520 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5521 iremote[i].index = lidx; 5522 iremote[i].rank = owner; 5523 } 5524 /* Create SF to communicate how many nonzero columns for each row */ 5525 PetscCall(PetscSFCreate(comm, &sf)); 5526 /* SF will figure out the number of nonzero columns for each row, and their 5527 * offsets 5528 * */ 5529 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5530 PetscCall(PetscSFSetFromOptions(sf)); 5531 PetscCall(PetscSFSetUp(sf)); 5532 5533 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5534 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5535 PetscCall(PetscCalloc1(nrows, &pnnz)); 5536 roffsets[0] = 0; 5537 roffsets[1] = 0; 5538 for (i = 0; i < plocalsize; i++) { 5539 /* diagonal */ 5540 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5541 /* off-diagonal */ 5542 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5543 /* compute offsets so that we relative location for each row */ 5544 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5545 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5546 } 5547 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5548 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5549 /* 'r' means root, and 'l' means leaf */ 5550 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5551 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5552 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5553 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5554 PetscCall(PetscSFDestroy(&sf)); 5555 PetscCall(PetscFree(roffsets)); 5556 PetscCall(PetscFree(nrcols)); 5557 dntotalcols = 0; 5558 ontotalcols = 0; 5559 ncol = 0; 5560 for (i = 0; i < nrows; i++) { 5561 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5562 ncol = PetscMax(pnnz[i], ncol); 5563 /* diagonal */ 5564 dntotalcols += nlcols[i * 2 + 0]; 5565 /* off-diagonal */ 5566 ontotalcols += nlcols[i * 2 + 1]; 5567 } 5568 /* We do not need to figure the right number of columns 5569 * since all the calculations will be done by going through the raw data 5570 * */ 5571 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5572 PetscCall(MatSetUp(*P_oth)); 5573 PetscCall(PetscFree(pnnz)); 5574 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5575 /* diagonal */ 5576 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5577 /* off-diagonal */ 5578 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5579 /* diagonal */ 5580 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5581 /* off-diagonal */ 5582 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5583 dntotalcols = 0; 5584 ontotalcols = 0; 5585 ntotalcols = 0; 5586 for (i = 0; i < nrows; i++) { 5587 owner = 0; 5588 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5589 /* Set iremote for diag matrix */ 5590 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5591 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5592 iremote[dntotalcols].rank = owner; 5593 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5594 ilocal[dntotalcols++] = ntotalcols++; 5595 } 5596 /* off-diagonal */ 5597 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5598 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5599 oiremote[ontotalcols].rank = owner; 5600 oilocal[ontotalcols++] = ntotalcols++; 5601 } 5602 } 5603 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5604 PetscCall(PetscFree(loffsets)); 5605 PetscCall(PetscFree(nlcols)); 5606 PetscCall(PetscSFCreate(comm, &sf)); 5607 /* P serves as roots and P_oth is leaves 5608 * Diag matrix 5609 * */ 5610 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5611 PetscCall(PetscSFSetFromOptions(sf)); 5612 PetscCall(PetscSFSetUp(sf)); 5613 5614 PetscCall(PetscSFCreate(comm, &osf)); 5615 /* off-diagonal */ 5616 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5617 PetscCall(PetscSFSetFromOptions(osf)); 5618 PetscCall(PetscSFSetUp(osf)); 5619 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5620 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5621 /* operate on the matrix internal data to save memory */ 5622 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5623 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5624 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5625 /* Convert to global indices for diag matrix */ 5626 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5627 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5628 /* We want P_oth store global indices */ 5629 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5630 /* Use memory scalable approach */ 5631 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5632 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5633 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5634 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5635 /* Convert back to local indices */ 5636 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5637 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5638 nout = 0; 5639 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5640 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5641 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5642 /* Exchange values */ 5643 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5644 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5645 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5646 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5647 /* Stop PETSc from shrinking memory */ 5648 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5649 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5650 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5651 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5652 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5653 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5654 PetscCall(PetscSFDestroy(&sf)); 5655 PetscCall(PetscSFDestroy(&osf)); 5656 PetscFunctionReturn(PETSC_SUCCESS); 5657 } 5658 5659 /* 5660 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5661 * This supports MPIAIJ and MAIJ 5662 * */ 5663 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5664 { 5665 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5666 Mat_SeqAIJ *p_oth; 5667 IS rows, map; 5668 PetscHMapI hamp; 5669 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5670 MPI_Comm comm; 5671 PetscSF sf, osf; 5672 PetscBool has; 5673 5674 PetscFunctionBegin; 5675 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5676 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5677 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5678 * and then create a submatrix (that often is an overlapping matrix) 5679 * */ 5680 if (reuse == MAT_INITIAL_MATRIX) { 5681 /* Use a hash table to figure out unique keys */ 5682 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5683 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5684 count = 0; 5685 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5686 for (i = 0; i < a->B->cmap->n; i++) { 5687 key = a->garray[i] / dof; 5688 PetscCall(PetscHMapIHas(hamp, key, &has)); 5689 if (!has) { 5690 mapping[i] = count; 5691 PetscCall(PetscHMapISet(hamp, key, count++)); 5692 } else { 5693 /* Current 'i' has the same value the previous step */ 5694 mapping[i] = count - 1; 5695 } 5696 } 5697 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5698 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5699 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5700 PetscCall(PetscCalloc1(htsize, &rowindices)); 5701 off = 0; 5702 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5703 PetscCall(PetscHMapIDestroy(&hamp)); 5704 PetscCall(PetscSortInt(htsize, rowindices)); 5705 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5706 /* In case, the matrix was already created but users want to recreate the matrix */ 5707 PetscCall(MatDestroy(P_oth)); 5708 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5709 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5710 PetscCall(ISDestroy(&map)); 5711 PetscCall(ISDestroy(&rows)); 5712 } else if (reuse == MAT_REUSE_MATRIX) { 5713 /* If matrix was already created, we simply update values using SF objects 5714 * that as attached to the matrix earlier. 5715 */ 5716 const PetscScalar *pd_a, *po_a; 5717 5718 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5719 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5720 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5721 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5722 /* Update values in place */ 5723 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5724 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5725 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5726 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5727 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5728 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5729 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5730 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5731 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5732 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5733 PetscFunctionReturn(PETSC_SUCCESS); 5734 } 5735 5736 /*@C 5737 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5738 5739 Collective 5740 5741 Input Parameters: 5742 + A - the first matrix in `MATMPIAIJ` format 5743 . B - the second matrix in `MATMPIAIJ` format 5744 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5745 5746 Output Parameters: 5747 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5748 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5749 - B_seq - the sequential matrix generated 5750 5751 Level: developer 5752 5753 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5754 @*/ 5755 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5756 { 5757 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5758 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5759 IS isrowb, iscolb; 5760 Mat *bseq = NULL; 5761 5762 PetscFunctionBegin; 5763 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5764 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5765 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5766 5767 if (scall == MAT_INITIAL_MATRIX) { 5768 start = A->cmap->rstart; 5769 cmap = a->garray; 5770 nzA = a->A->cmap->n; 5771 nzB = a->B->cmap->n; 5772 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5773 ncols = 0; 5774 for (i = 0; i < nzB; i++) { /* row < local row index */ 5775 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5776 else break; 5777 } 5778 imark = i; 5779 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5780 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5781 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5782 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5783 } else { 5784 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5785 isrowb = *rowb; 5786 iscolb = *colb; 5787 PetscCall(PetscMalloc1(1, &bseq)); 5788 bseq[0] = *B_seq; 5789 } 5790 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5791 *B_seq = bseq[0]; 5792 PetscCall(PetscFree(bseq)); 5793 if (!rowb) { 5794 PetscCall(ISDestroy(&isrowb)); 5795 } else { 5796 *rowb = isrowb; 5797 } 5798 if (!colb) { 5799 PetscCall(ISDestroy(&iscolb)); 5800 } else { 5801 *colb = iscolb; 5802 } 5803 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5804 PetscFunctionReturn(PETSC_SUCCESS); 5805 } 5806 5807 /* 5808 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5809 of the OFF-DIAGONAL portion of local A 5810 5811 Collective 5812 5813 Input Parameters: 5814 + A,B - the matrices in `MATMPIAIJ` format 5815 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5816 5817 Output Parameter: 5818 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5819 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5820 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5821 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5822 5823 Developer Note: 5824 This directly accesses information inside the VecScatter associated with the matrix-vector product 5825 for this matrix. This is not desirable.. 5826 5827 Level: developer 5828 5829 */ 5830 5831 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5832 { 5833 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5834 VecScatter ctx; 5835 MPI_Comm comm; 5836 const PetscMPIInt *rprocs, *sprocs; 5837 PetscMPIInt nrecvs, nsends; 5838 const PetscInt *srow, *rstarts, *sstarts; 5839 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5840 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5841 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5842 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5843 PetscMPIInt size, tag, rank, nreqs; 5844 5845 PetscFunctionBegin; 5846 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5847 PetscCallMPI(MPI_Comm_size(comm, &size)); 5848 5849 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5850 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5851 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5852 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5853 5854 if (size == 1) { 5855 startsj_s = NULL; 5856 bufa_ptr = NULL; 5857 *B_oth = NULL; 5858 PetscFunctionReturn(PETSC_SUCCESS); 5859 } 5860 5861 ctx = a->Mvctx; 5862 tag = ((PetscObject)ctx)->tag; 5863 5864 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5865 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5866 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5867 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5868 PetscCall(PetscMalloc1(nreqs, &reqs)); 5869 rwaits = reqs; 5870 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5871 5872 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5873 if (scall == MAT_INITIAL_MATRIX) { 5874 /* i-array */ 5875 /* post receives */ 5876 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5877 for (i = 0; i < nrecvs; i++) { 5878 rowlen = rvalues + rstarts[i] * rbs; 5879 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5880 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5881 } 5882 5883 /* pack the outgoing message */ 5884 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5885 5886 sstartsj[0] = 0; 5887 rstartsj[0] = 0; 5888 len = 0; /* total length of j or a array to be sent */ 5889 if (nsends) { 5890 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5891 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5892 } 5893 for (i = 0; i < nsends; i++) { 5894 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5895 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5896 for (j = 0; j < nrows; j++) { 5897 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5898 for (l = 0; l < sbs; l++) { 5899 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5900 5901 rowlen[j * sbs + l] = ncols; 5902 5903 len += ncols; 5904 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5905 } 5906 k++; 5907 } 5908 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5909 5910 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5911 } 5912 /* recvs and sends of i-array are completed */ 5913 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5914 PetscCall(PetscFree(svalues)); 5915 5916 /* allocate buffers for sending j and a arrays */ 5917 PetscCall(PetscMalloc1(len + 1, &bufj)); 5918 PetscCall(PetscMalloc1(len + 1, &bufa)); 5919 5920 /* create i-array of B_oth */ 5921 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5922 5923 b_othi[0] = 0; 5924 len = 0; /* total length of j or a array to be received */ 5925 k = 0; 5926 for (i = 0; i < nrecvs; i++) { 5927 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5928 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5929 for (j = 0; j < nrows; j++) { 5930 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5931 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5932 k++; 5933 } 5934 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5935 } 5936 PetscCall(PetscFree(rvalues)); 5937 5938 /* allocate space for j and a arrays of B_oth */ 5939 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5940 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5941 5942 /* j-array */ 5943 /* post receives of j-array */ 5944 for (i = 0; i < nrecvs; i++) { 5945 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5946 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5947 } 5948 5949 /* pack the outgoing message j-array */ 5950 if (nsends) k = sstarts[0]; 5951 for (i = 0; i < nsends; i++) { 5952 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5953 bufJ = bufj + sstartsj[i]; 5954 for (j = 0; j < nrows; j++) { 5955 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5956 for (ll = 0; ll < sbs; ll++) { 5957 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5958 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5959 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5960 } 5961 } 5962 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5963 } 5964 5965 /* recvs and sends of j-array are completed */ 5966 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5967 } else if (scall == MAT_REUSE_MATRIX) { 5968 sstartsj = *startsj_s; 5969 rstartsj = *startsj_r; 5970 bufa = *bufa_ptr; 5971 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5972 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5973 5974 /* a-array */ 5975 /* post receives of a-array */ 5976 for (i = 0; i < nrecvs; i++) { 5977 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5978 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5979 } 5980 5981 /* pack the outgoing message a-array */ 5982 if (nsends) k = sstarts[0]; 5983 for (i = 0; i < nsends; i++) { 5984 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5985 bufA = bufa + sstartsj[i]; 5986 for (j = 0; j < nrows; j++) { 5987 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5988 for (ll = 0; ll < sbs; ll++) { 5989 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5990 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5991 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5992 } 5993 } 5994 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5995 } 5996 /* recvs and sends of a-array are completed */ 5997 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5998 PetscCall(PetscFree(reqs)); 5999 6000 if (scall == MAT_INITIAL_MATRIX) { 6001 Mat_SeqAIJ *b_oth; 6002 6003 /* put together the new matrix */ 6004 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6005 6006 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6007 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6008 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6009 b_oth->free_a = PETSC_TRUE; 6010 b_oth->free_ij = PETSC_TRUE; 6011 b_oth->nonew = 0; 6012 6013 PetscCall(PetscFree(bufj)); 6014 if (!startsj_s || !bufa_ptr) { 6015 PetscCall(PetscFree2(sstartsj, rstartsj)); 6016 PetscCall(PetscFree(bufa_ptr)); 6017 } else { 6018 *startsj_s = sstartsj; 6019 *startsj_r = rstartsj; 6020 *bufa_ptr = bufa; 6021 } 6022 } else if (scall == MAT_REUSE_MATRIX) { 6023 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6024 } 6025 6026 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6027 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6028 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6029 PetscFunctionReturn(PETSC_SUCCESS); 6030 } 6031 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6035 #if defined(PETSC_HAVE_MKL_SPARSE) 6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6037 #endif 6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6040 #if defined(PETSC_HAVE_ELEMENTAL) 6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6042 #endif 6043 #if defined(PETSC_HAVE_SCALAPACK) 6044 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6045 #endif 6046 #if defined(PETSC_HAVE_HYPRE) 6047 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6048 #endif 6049 #if defined(PETSC_HAVE_CUDA) 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6051 #endif 6052 #if defined(PETSC_HAVE_HIP) 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6054 #endif 6055 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6057 #endif 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6059 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6060 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6061 6062 /* 6063 Computes (B'*A')' since computing B*A directly is untenable 6064 6065 n p p 6066 [ ] [ ] [ ] 6067 m [ A ] * n [ B ] = m [ C ] 6068 [ ] [ ] [ ] 6069 6070 */ 6071 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6072 { 6073 Mat At, Bt, Ct; 6074 6075 PetscFunctionBegin; 6076 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6077 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6078 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6079 PetscCall(MatDestroy(&At)); 6080 PetscCall(MatDestroy(&Bt)); 6081 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6082 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6083 PetscCall(MatDestroy(&Ct)); 6084 PetscFunctionReturn(PETSC_SUCCESS); 6085 } 6086 6087 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6088 { 6089 PetscBool cisdense; 6090 6091 PetscFunctionBegin; 6092 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6093 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6094 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6095 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6096 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6097 PetscCall(MatSetUp(C)); 6098 6099 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6100 PetscFunctionReturn(PETSC_SUCCESS); 6101 } 6102 6103 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6104 { 6105 Mat_Product *product = C->product; 6106 Mat A = product->A, B = product->B; 6107 6108 PetscFunctionBegin; 6109 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6110 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6111 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6112 C->ops->productsymbolic = MatProductSymbolic_AB; 6113 PetscFunctionReturn(PETSC_SUCCESS); 6114 } 6115 6116 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6117 { 6118 Mat_Product *product = C->product; 6119 6120 PetscFunctionBegin; 6121 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6122 PetscFunctionReturn(PETSC_SUCCESS); 6123 } 6124 6125 /* 6126 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6127 6128 Input Parameters: 6129 6130 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6131 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6132 6133 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6134 6135 For Set1, j1[] contains column indices of the nonzeros. 6136 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6137 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6138 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6139 6140 Similar for Set2. 6141 6142 This routine merges the two sets of nonzeros row by row and removes repeats. 6143 6144 Output Parameters: (memory is allocated by the caller) 6145 6146 i[],j[]: the CSR of the merged matrix, which has m rows. 6147 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6148 imap2[]: similar to imap1[], but for Set2. 6149 Note we order nonzeros row-by-row and from left to right. 6150 */ 6151 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6152 { 6153 PetscInt r, m; /* Row index of mat */ 6154 PetscCount t, t1, t2, b1, e1, b2, e2; 6155 6156 PetscFunctionBegin; 6157 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6158 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6159 i[0] = 0; 6160 for (r = 0; r < m; r++) { /* Do row by row merging */ 6161 b1 = rowBegin1[r]; 6162 e1 = rowEnd1[r]; 6163 b2 = rowBegin2[r]; 6164 e2 = rowEnd2[r]; 6165 while (b1 < e1 && b2 < e2) { 6166 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6167 j[t] = j1[b1]; 6168 imap1[t1] = t; 6169 imap2[t2] = t; 6170 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6171 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6172 t1++; 6173 t2++; 6174 t++; 6175 } else if (j1[b1] < j2[b2]) { 6176 j[t] = j1[b1]; 6177 imap1[t1] = t; 6178 b1 += jmap1[t1 + 1] - jmap1[t1]; 6179 t1++; 6180 t++; 6181 } else { 6182 j[t] = j2[b2]; 6183 imap2[t2] = t; 6184 b2 += jmap2[t2 + 1] - jmap2[t2]; 6185 t2++; 6186 t++; 6187 } 6188 } 6189 /* Merge the remaining in either j1[] or j2[] */ 6190 while (b1 < e1) { 6191 j[t] = j1[b1]; 6192 imap1[t1] = t; 6193 b1 += jmap1[t1 + 1] - jmap1[t1]; 6194 t1++; 6195 t++; 6196 } 6197 while (b2 < e2) { 6198 j[t] = j2[b2]; 6199 imap2[t2] = t; 6200 b2 += jmap2[t2 + 1] - jmap2[t2]; 6201 t2++; 6202 t++; 6203 } 6204 PetscCall(PetscIntCast(t, i + r + 1)); 6205 } 6206 PetscFunctionReturn(PETSC_SUCCESS); 6207 } 6208 6209 /* 6210 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6211 6212 Input Parameters: 6213 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6214 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6215 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6216 6217 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6218 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6219 6220 Output Parameters: 6221 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6222 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6223 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6224 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6225 6226 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6227 Atot: number of entries belonging to the diagonal block. 6228 Annz: number of unique nonzeros belonging to the diagonal block. 6229 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6230 repeats (i.e., same 'i,j' pair). 6231 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6232 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6233 6234 Atot: number of entries belonging to the diagonal block 6235 Annz: number of unique nonzeros belonging to the diagonal block. 6236 6237 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6238 6239 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6240 */ 6241 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6242 { 6243 PetscInt cstart, cend, rstart, rend, row, col; 6244 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6245 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6246 PetscCount k, m, p, q, r, s, mid; 6247 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6248 6249 PetscFunctionBegin; 6250 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6251 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6252 m = rend - rstart; 6253 6254 /* Skip negative rows */ 6255 for (k = 0; k < n; k++) 6256 if (i[k] >= 0) break; 6257 6258 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6259 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6260 */ 6261 while (k < n) { 6262 row = i[k]; 6263 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6264 for (s = k; s < n; s++) 6265 if (i[s] != row) break; 6266 6267 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6268 for (p = k; p < s; p++) { 6269 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6270 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6271 } 6272 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6273 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6274 rowBegin[row - rstart] = k; 6275 rowMid[row - rstart] = mid; 6276 rowEnd[row - rstart] = s; 6277 6278 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6279 Atot += mid - k; 6280 Btot += s - mid; 6281 6282 /* Count unique nonzeros of this diag row */ 6283 for (p = k; p < mid;) { 6284 col = j[p]; 6285 do { 6286 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6287 p++; 6288 } while (p < mid && j[p] == col); 6289 Annz++; 6290 } 6291 6292 /* Count unique nonzeros of this offdiag row */ 6293 for (p = mid; p < s;) { 6294 col = j[p]; 6295 do { 6296 p++; 6297 } while (p < s && j[p] == col); 6298 Bnnz++; 6299 } 6300 k = s; 6301 } 6302 6303 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6304 PetscCall(PetscMalloc1(Atot, &Aperm)); 6305 PetscCall(PetscMalloc1(Btot, &Bperm)); 6306 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6307 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6308 6309 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6310 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6311 for (r = 0; r < m; r++) { 6312 k = rowBegin[r]; 6313 mid = rowMid[r]; 6314 s = rowEnd[r]; 6315 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6316 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6317 Atot += mid - k; 6318 Btot += s - mid; 6319 6320 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6321 for (p = k; p < mid;) { 6322 col = j[p]; 6323 q = p; 6324 do { 6325 p++; 6326 } while (p < mid && j[p] == col); 6327 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6328 Annz++; 6329 } 6330 6331 for (p = mid; p < s;) { 6332 col = j[p]; 6333 q = p; 6334 do { 6335 p++; 6336 } while (p < s && j[p] == col); 6337 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6338 Bnnz++; 6339 } 6340 } 6341 /* Output */ 6342 *Aperm_ = Aperm; 6343 *Annz_ = Annz; 6344 *Atot_ = Atot; 6345 *Ajmap_ = Ajmap; 6346 *Bperm_ = Bperm; 6347 *Bnnz_ = Bnnz; 6348 *Btot_ = Btot; 6349 *Bjmap_ = Bjmap; 6350 PetscFunctionReturn(PETSC_SUCCESS); 6351 } 6352 6353 /* 6354 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6355 6356 Input Parameters: 6357 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6358 nnz: number of unique nonzeros in the merged matrix 6359 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6360 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6361 6362 Output Parameter: (memory is allocated by the caller) 6363 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6364 6365 Example: 6366 nnz1 = 4 6367 nnz = 6 6368 imap = [1,3,4,5] 6369 jmap = [0,3,5,6,7] 6370 then, 6371 jmap_new = [0,0,3,3,5,6,7] 6372 */ 6373 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6374 { 6375 PetscCount k, p; 6376 6377 PetscFunctionBegin; 6378 jmap_new[0] = 0; 6379 p = nnz; /* p loops over jmap_new[] backwards */ 6380 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6381 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6382 } 6383 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6384 PetscFunctionReturn(PETSC_SUCCESS); 6385 } 6386 6387 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6388 { 6389 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6390 6391 PetscFunctionBegin; 6392 PetscCall(PetscSFDestroy(&coo->sf)); 6393 PetscCall(PetscFree(coo->Aperm1)); 6394 PetscCall(PetscFree(coo->Bperm1)); 6395 PetscCall(PetscFree(coo->Ajmap1)); 6396 PetscCall(PetscFree(coo->Bjmap1)); 6397 PetscCall(PetscFree(coo->Aimap2)); 6398 PetscCall(PetscFree(coo->Bimap2)); 6399 PetscCall(PetscFree(coo->Aperm2)); 6400 PetscCall(PetscFree(coo->Bperm2)); 6401 PetscCall(PetscFree(coo->Ajmap2)); 6402 PetscCall(PetscFree(coo->Bjmap2)); 6403 PetscCall(PetscFree(coo->Cperm1)); 6404 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6405 PetscCall(PetscFree(coo)); 6406 PetscFunctionReturn(PETSC_SUCCESS); 6407 } 6408 6409 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6410 { 6411 MPI_Comm comm; 6412 PetscMPIInt rank, size; 6413 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6414 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6415 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6416 PetscContainer container; 6417 MatCOOStruct_MPIAIJ *coo; 6418 6419 PetscFunctionBegin; 6420 PetscCall(PetscFree(mpiaij->garray)); 6421 PetscCall(VecDestroy(&mpiaij->lvec)); 6422 #if defined(PETSC_USE_CTABLE) 6423 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6424 #else 6425 PetscCall(PetscFree(mpiaij->colmap)); 6426 #endif 6427 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6428 mat->assembled = PETSC_FALSE; 6429 mat->was_assembled = PETSC_FALSE; 6430 6431 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6432 PetscCallMPI(MPI_Comm_size(comm, &size)); 6433 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6434 PetscCall(PetscLayoutSetUp(mat->rmap)); 6435 PetscCall(PetscLayoutSetUp(mat->cmap)); 6436 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6437 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6438 PetscCall(MatGetLocalSize(mat, &m, &n)); 6439 PetscCall(MatGetSize(mat, &M, &N)); 6440 6441 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6442 /* entries come first, then local rows, then remote rows. */ 6443 PetscCount n1 = coo_n, *perm1; 6444 PetscInt *i1 = coo_i, *j1 = coo_j; 6445 6446 PetscCall(PetscMalloc1(n1, &perm1)); 6447 for (k = 0; k < n1; k++) perm1[k] = k; 6448 6449 /* Manipulate indices so that entries with negative row or col indices will have smallest 6450 row indices, local entries will have greater but negative row indices, and remote entries 6451 will have positive row indices. 6452 */ 6453 for (k = 0; k < n1; k++) { 6454 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6455 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6456 else { 6457 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6458 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6459 } 6460 } 6461 6462 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6463 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6464 6465 /* Advance k to the first entry we need to take care of */ 6466 for (k = 0; k < n1; k++) 6467 if (i1[k] > PETSC_INT_MIN) break; 6468 PetscCount i1start = k; 6469 6470 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6471 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6472 6473 /* Send remote rows to their owner */ 6474 /* Find which rows should be sent to which remote ranks*/ 6475 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6476 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6477 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6478 const PetscInt *ranges; 6479 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6480 6481 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6482 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6483 for (k = rem; k < n1;) { 6484 PetscMPIInt owner; 6485 PetscInt firstRow, lastRow; 6486 6487 /* Locate a row range */ 6488 firstRow = i1[k]; /* first row of this owner */ 6489 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6490 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6491 6492 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6493 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6494 6495 /* All entries in [k,p) belong to this remote owner */ 6496 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6497 PetscMPIInt *sendto2; 6498 PetscInt *nentries2; 6499 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6500 6501 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6502 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6503 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6504 PetscCall(PetscFree2(sendto, nentries2)); 6505 sendto = sendto2; 6506 nentries = nentries2; 6507 maxNsend = maxNsend2; 6508 } 6509 sendto[nsend] = owner; 6510 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6511 nsend++; 6512 k = p; 6513 } 6514 6515 /* Build 1st SF to know offsets on remote to send data */ 6516 PetscSF sf1; 6517 PetscInt nroots = 1, nroots2 = 0; 6518 PetscInt nleaves = nsend, nleaves2 = 0; 6519 PetscInt *offsets; 6520 PetscSFNode *iremote; 6521 6522 PetscCall(PetscSFCreate(comm, &sf1)); 6523 PetscCall(PetscMalloc1(nsend, &iremote)); 6524 PetscCall(PetscMalloc1(nsend, &offsets)); 6525 for (k = 0; k < nsend; k++) { 6526 iremote[k].rank = sendto[k]; 6527 iremote[k].index = 0; 6528 nleaves2 += nentries[k]; 6529 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6530 } 6531 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6532 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6533 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6534 PetscCall(PetscSFDestroy(&sf1)); 6535 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6536 6537 /* Build 2nd SF to send remote COOs to their owner */ 6538 PetscSF sf2; 6539 nroots = nroots2; 6540 nleaves = nleaves2; 6541 PetscCall(PetscSFCreate(comm, &sf2)); 6542 PetscCall(PetscSFSetFromOptions(sf2)); 6543 PetscCall(PetscMalloc1(nleaves, &iremote)); 6544 p = 0; 6545 for (k = 0; k < nsend; k++) { 6546 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6547 for (q = 0; q < nentries[k]; q++, p++) { 6548 iremote[p].rank = sendto[k]; 6549 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6550 } 6551 } 6552 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6553 6554 /* Send the remote COOs to their owner */ 6555 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6556 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6557 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6558 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6559 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6560 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6561 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6562 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6563 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6564 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6565 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6566 6567 PetscCall(PetscFree(offsets)); 6568 PetscCall(PetscFree2(sendto, nentries)); 6569 6570 /* Sort received COOs by row along with the permutation array */ 6571 for (k = 0; k < n2; k++) perm2[k] = k; 6572 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6573 6574 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6575 PetscCount *Cperm1; 6576 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6577 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6578 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6579 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6580 6581 /* Support for HYPRE matrices, kind of a hack. 6582 Swap min column with diagonal so that diagonal values will go first */ 6583 PetscBool hypre; 6584 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6585 if (hypre) { 6586 PetscInt *minj; 6587 PetscBT hasdiag; 6588 6589 PetscCall(PetscBTCreate(m, &hasdiag)); 6590 PetscCall(PetscMalloc1(m, &minj)); 6591 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6592 for (k = i1start; k < rem; k++) { 6593 if (j1[k] < cstart || j1[k] >= cend) continue; 6594 const PetscInt rindex = i1[k] - rstart; 6595 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6596 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6597 } 6598 for (k = 0; k < n2; k++) { 6599 if (j2[k] < cstart || j2[k] >= cend) continue; 6600 const PetscInt rindex = i2[k] - rstart; 6601 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6602 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6603 } 6604 for (k = i1start; k < rem; k++) { 6605 const PetscInt rindex = i1[k] - rstart; 6606 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6607 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6608 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6609 } 6610 for (k = 0; k < n2; k++) { 6611 const PetscInt rindex = i2[k] - rstart; 6612 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6613 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6614 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6615 } 6616 PetscCall(PetscBTDestroy(&hasdiag)); 6617 PetscCall(PetscFree(minj)); 6618 } 6619 6620 /* Split local COOs and received COOs into diag/offdiag portions */ 6621 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6622 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6623 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6624 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6625 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6626 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6627 6628 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6629 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6630 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6631 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6632 6633 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6634 PetscInt *Ai, *Bi; 6635 PetscInt *Aj, *Bj; 6636 6637 PetscCall(PetscMalloc1(m + 1, &Ai)); 6638 PetscCall(PetscMalloc1(m + 1, &Bi)); 6639 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6640 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6641 6642 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6643 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6644 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6645 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6646 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6647 6648 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6649 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6650 6651 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6652 /* expect nonzeros in A/B most likely have local contributing entries */ 6653 PetscInt Annz = Ai[m]; 6654 PetscInt Bnnz = Bi[m]; 6655 PetscCount *Ajmap1_new, *Bjmap1_new; 6656 6657 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6658 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6659 6660 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6661 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6662 6663 PetscCall(PetscFree(Aimap1)); 6664 PetscCall(PetscFree(Ajmap1)); 6665 PetscCall(PetscFree(Bimap1)); 6666 PetscCall(PetscFree(Bjmap1)); 6667 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6668 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6669 PetscCall(PetscFree(perm1)); 6670 PetscCall(PetscFree3(i2, j2, perm2)); 6671 6672 Ajmap1 = Ajmap1_new; 6673 Bjmap1 = Bjmap1_new; 6674 6675 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6676 if (Annz < Annz1 + Annz2) { 6677 PetscInt *Aj_new; 6678 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6679 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6680 PetscCall(PetscFree(Aj)); 6681 Aj = Aj_new; 6682 } 6683 6684 if (Bnnz < Bnnz1 + Bnnz2) { 6685 PetscInt *Bj_new; 6686 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6687 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6688 PetscCall(PetscFree(Bj)); 6689 Bj = Bj_new; 6690 } 6691 6692 /* Create new submatrices for on-process and off-process coupling */ 6693 PetscScalar *Aa, *Ba; 6694 MatType rtype; 6695 Mat_SeqAIJ *a, *b; 6696 PetscObjectState state; 6697 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6698 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6699 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6700 if (cstart) { 6701 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6702 } 6703 6704 PetscCall(MatGetRootType_Private(mat, &rtype)); 6705 6706 MatSeqXAIJGetOptions_Private(mpiaij->A); 6707 PetscCall(MatDestroy(&mpiaij->A)); 6708 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6709 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6710 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6711 6712 MatSeqXAIJGetOptions_Private(mpiaij->B); 6713 PetscCall(MatDestroy(&mpiaij->B)); 6714 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6715 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6716 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6717 6718 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6719 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6720 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6721 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6722 6723 a = (Mat_SeqAIJ *)mpiaij->A->data; 6724 b = (Mat_SeqAIJ *)mpiaij->B->data; 6725 a->free_a = PETSC_TRUE; 6726 a->free_ij = PETSC_TRUE; 6727 b->free_a = PETSC_TRUE; 6728 b->free_ij = PETSC_TRUE; 6729 a->maxnz = a->nz; 6730 b->maxnz = b->nz; 6731 6732 /* conversion must happen AFTER multiply setup */ 6733 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6734 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6735 PetscCall(VecDestroy(&mpiaij->lvec)); 6736 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6737 6738 // Put the COO struct in a container and then attach that to the matrix 6739 PetscCall(PetscMalloc1(1, &coo)); 6740 coo->n = coo_n; 6741 coo->sf = sf2; 6742 coo->sendlen = nleaves; 6743 coo->recvlen = nroots; 6744 coo->Annz = Annz; 6745 coo->Bnnz = Bnnz; 6746 coo->Annz2 = Annz2; 6747 coo->Bnnz2 = Bnnz2; 6748 coo->Atot1 = Atot1; 6749 coo->Atot2 = Atot2; 6750 coo->Btot1 = Btot1; 6751 coo->Btot2 = Btot2; 6752 coo->Ajmap1 = Ajmap1; 6753 coo->Aperm1 = Aperm1; 6754 coo->Bjmap1 = Bjmap1; 6755 coo->Bperm1 = Bperm1; 6756 coo->Aimap2 = Aimap2; 6757 coo->Ajmap2 = Ajmap2; 6758 coo->Aperm2 = Aperm2; 6759 coo->Bimap2 = Bimap2; 6760 coo->Bjmap2 = Bjmap2; 6761 coo->Bperm2 = Bperm2; 6762 coo->Cperm1 = Cperm1; 6763 // Allocate in preallocation. If not used, it has zero cost on host 6764 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6765 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6766 PetscCall(PetscContainerSetPointer(container, coo)); 6767 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6768 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6769 PetscCall(PetscContainerDestroy(&container)); 6770 PetscFunctionReturn(PETSC_SUCCESS); 6771 } 6772 6773 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6774 { 6775 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6776 Mat A = mpiaij->A, B = mpiaij->B; 6777 PetscScalar *Aa, *Ba; 6778 PetscScalar *sendbuf, *recvbuf; 6779 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6780 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6781 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6782 const PetscCount *Cperm1; 6783 PetscContainer container; 6784 MatCOOStruct_MPIAIJ *coo; 6785 6786 PetscFunctionBegin; 6787 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6788 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6789 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6790 sendbuf = coo->sendbuf; 6791 recvbuf = coo->recvbuf; 6792 Ajmap1 = coo->Ajmap1; 6793 Ajmap2 = coo->Ajmap2; 6794 Aimap2 = coo->Aimap2; 6795 Bjmap1 = coo->Bjmap1; 6796 Bjmap2 = coo->Bjmap2; 6797 Bimap2 = coo->Bimap2; 6798 Aperm1 = coo->Aperm1; 6799 Aperm2 = coo->Aperm2; 6800 Bperm1 = coo->Bperm1; 6801 Bperm2 = coo->Bperm2; 6802 Cperm1 = coo->Cperm1; 6803 6804 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6805 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6806 6807 /* Pack entries to be sent to remote */ 6808 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6809 6810 /* Send remote entries to their owner and overlap the communication with local computation */ 6811 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6812 /* Add local entries to A and B */ 6813 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6814 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6815 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6816 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6817 } 6818 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6819 PetscScalar sum = 0.0; 6820 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6821 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6822 } 6823 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6824 6825 /* Add received remote entries to A and B */ 6826 for (PetscCount i = 0; i < coo->Annz2; i++) { 6827 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6828 } 6829 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6830 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6831 } 6832 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6833 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6834 PetscFunctionReturn(PETSC_SUCCESS); 6835 } 6836 6837 /*MC 6838 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6839 6840 Options Database Keys: 6841 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6842 6843 Level: beginner 6844 6845 Notes: 6846 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6847 in this case the values associated with the rows and columns one passes in are set to zero 6848 in the matrix 6849 6850 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6851 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6852 6853 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6854 M*/ 6855 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6856 { 6857 Mat_MPIAIJ *b; 6858 PetscMPIInt size; 6859 6860 PetscFunctionBegin; 6861 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6862 6863 PetscCall(PetscNew(&b)); 6864 B->data = (void *)b; 6865 B->ops[0] = MatOps_Values; 6866 B->assembled = PETSC_FALSE; 6867 B->insertmode = NOT_SET_VALUES; 6868 b->size = size; 6869 6870 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6871 6872 /* build cache for off array entries formed */ 6873 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6874 6875 b->donotstash = PETSC_FALSE; 6876 b->colmap = NULL; 6877 b->garray = NULL; 6878 b->roworiented = PETSC_TRUE; 6879 6880 /* stuff used for matrix vector multiply */ 6881 b->lvec = NULL; 6882 b->Mvctx = NULL; 6883 6884 /* stuff for MatGetRow() */ 6885 b->rowindices = NULL; 6886 b->rowvalues = NULL; 6887 b->getrowactive = PETSC_FALSE; 6888 6889 /* flexible pointer used in CUSPARSE classes */ 6890 b->spptr = NULL; 6891 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6902 #if defined(PETSC_HAVE_CUDA) 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6904 #endif 6905 #if defined(PETSC_HAVE_HIP) 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6907 #endif 6908 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6910 #endif 6911 #if defined(PETSC_HAVE_MKL_SPARSE) 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6913 #endif 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6918 #if defined(PETSC_HAVE_ELEMENTAL) 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6920 #endif 6921 #if defined(PETSC_HAVE_SCALAPACK) 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6923 #endif 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6926 #if defined(PETSC_HAVE_HYPRE) 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6929 #endif 6930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6934 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6935 PetscFunctionReturn(PETSC_SUCCESS); 6936 } 6937 6938 /*@ 6939 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6940 and "off-diagonal" part of the matrix in CSR format. 6941 6942 Collective 6943 6944 Input Parameters: 6945 + comm - MPI communicator 6946 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6947 . n - This value should be the same as the local size used in creating the 6948 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6949 calculated if `N` is given) For square matrices `n` is almost always `m`. 6950 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6951 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6952 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6953 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6954 . a - matrix values 6955 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6956 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6957 - oa - matrix values 6958 6959 Output Parameter: 6960 . mat - the matrix 6961 6962 Level: advanced 6963 6964 Notes: 6965 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6966 must free the arrays once the matrix has been destroyed and not before. 6967 6968 The `i` and `j` indices are 0 based 6969 6970 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6971 6972 This sets local rows and cannot be used to set off-processor values. 6973 6974 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6975 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6976 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6977 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6978 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6979 communication if it is known that only local entries will be set. 6980 6981 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6982 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6983 @*/ 6984 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6985 { 6986 Mat_MPIAIJ *maij; 6987 6988 PetscFunctionBegin; 6989 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6990 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6991 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6992 PetscCall(MatCreate(comm, mat)); 6993 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6994 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6995 maij = (Mat_MPIAIJ *)(*mat)->data; 6996 6997 (*mat)->preallocated = PETSC_TRUE; 6998 6999 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7000 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7001 7002 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7003 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7004 7005 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7006 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7007 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7008 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7009 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7010 PetscFunctionReturn(PETSC_SUCCESS); 7011 } 7012 7013 typedef struct { 7014 Mat *mp; /* intermediate products */ 7015 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7016 PetscInt cp; /* number of intermediate products */ 7017 7018 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7019 PetscInt *startsj_s, *startsj_r; 7020 PetscScalar *bufa; 7021 Mat P_oth; 7022 7023 /* may take advantage of merging product->B */ 7024 Mat Bloc; /* B-local by merging diag and off-diag */ 7025 7026 /* cusparse does not have support to split between symbolic and numeric phases. 7027 When api_user is true, we don't need to update the numerical values 7028 of the temporary storage */ 7029 PetscBool reusesym; 7030 7031 /* support for COO values insertion */ 7032 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7033 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7034 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7035 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7036 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7037 PetscMemType mtype; 7038 7039 /* customization */ 7040 PetscBool abmerge; 7041 PetscBool P_oth_bind; 7042 } MatMatMPIAIJBACKEND; 7043 7044 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7045 { 7046 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7047 PetscInt i; 7048 7049 PetscFunctionBegin; 7050 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7051 PetscCall(PetscFree(mmdata->bufa)); 7052 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7053 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7054 PetscCall(MatDestroy(&mmdata->P_oth)); 7055 PetscCall(MatDestroy(&mmdata->Bloc)); 7056 PetscCall(PetscSFDestroy(&mmdata->sf)); 7057 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7058 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7059 PetscCall(PetscFree(mmdata->own[0])); 7060 PetscCall(PetscFree(mmdata->own)); 7061 PetscCall(PetscFree(mmdata->off[0])); 7062 PetscCall(PetscFree(mmdata->off)); 7063 PetscCall(PetscFree(mmdata)); 7064 PetscFunctionReturn(PETSC_SUCCESS); 7065 } 7066 7067 /* Copy selected n entries with indices in idx[] of A to v[]. 7068 If idx is NULL, copy the whole data array of A to v[] 7069 */ 7070 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7071 { 7072 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7073 7074 PetscFunctionBegin; 7075 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7076 if (f) { 7077 PetscCall((*f)(A, n, idx, v)); 7078 } else { 7079 const PetscScalar *vv; 7080 7081 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7082 if (n && idx) { 7083 PetscScalar *w = v; 7084 const PetscInt *oi = idx; 7085 PetscInt j; 7086 7087 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7088 } else { 7089 PetscCall(PetscArraycpy(v, vv, n)); 7090 } 7091 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7092 } 7093 PetscFunctionReturn(PETSC_SUCCESS); 7094 } 7095 7096 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7097 { 7098 MatMatMPIAIJBACKEND *mmdata; 7099 PetscInt i, n_d, n_o; 7100 7101 PetscFunctionBegin; 7102 MatCheckProduct(C, 1); 7103 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7104 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7105 if (!mmdata->reusesym) { /* update temporary matrices */ 7106 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7107 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7108 } 7109 mmdata->reusesym = PETSC_FALSE; 7110 7111 for (i = 0; i < mmdata->cp; i++) { 7112 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7113 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7114 } 7115 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7116 PetscInt noff; 7117 7118 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7119 if (mmdata->mptmp[i]) continue; 7120 if (noff) { 7121 PetscInt nown; 7122 7123 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7124 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7125 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7126 n_o += noff; 7127 n_d += nown; 7128 } else { 7129 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7130 7131 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7132 n_d += mm->nz; 7133 } 7134 } 7135 if (mmdata->hasoffproc) { /* offprocess insertion */ 7136 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7137 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7138 } 7139 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7140 PetscFunctionReturn(PETSC_SUCCESS); 7141 } 7142 7143 /* Support for Pt * A, A * P, or Pt * A * P */ 7144 #define MAX_NUMBER_INTERMEDIATE 4 7145 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7146 { 7147 Mat_Product *product = C->product; 7148 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7149 Mat_MPIAIJ *a, *p; 7150 MatMatMPIAIJBACKEND *mmdata; 7151 ISLocalToGlobalMapping P_oth_l2g = NULL; 7152 IS glob = NULL; 7153 const char *prefix; 7154 char pprefix[256]; 7155 const PetscInt *globidx, *P_oth_idx; 7156 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7157 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7158 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7159 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7160 /* a base offset; type-2: sparse with a local to global map table */ 7161 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7162 7163 MatProductType ptype; 7164 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7165 PetscMPIInt size; 7166 7167 PetscFunctionBegin; 7168 MatCheckProduct(C, 1); 7169 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7170 ptype = product->type; 7171 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7172 ptype = MATPRODUCT_AB; 7173 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7174 } 7175 switch (ptype) { 7176 case MATPRODUCT_AB: 7177 A = product->A; 7178 P = product->B; 7179 m = A->rmap->n; 7180 n = P->cmap->n; 7181 M = A->rmap->N; 7182 N = P->cmap->N; 7183 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7184 break; 7185 case MATPRODUCT_AtB: 7186 P = product->A; 7187 A = product->B; 7188 m = P->cmap->n; 7189 n = A->cmap->n; 7190 M = P->cmap->N; 7191 N = A->cmap->N; 7192 hasoffproc = PETSC_TRUE; 7193 break; 7194 case MATPRODUCT_PtAP: 7195 A = product->A; 7196 P = product->B; 7197 m = P->cmap->n; 7198 n = P->cmap->n; 7199 M = P->cmap->N; 7200 N = P->cmap->N; 7201 hasoffproc = PETSC_TRUE; 7202 break; 7203 default: 7204 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7205 } 7206 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7207 if (size == 1) hasoffproc = PETSC_FALSE; 7208 7209 /* defaults */ 7210 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7211 mp[i] = NULL; 7212 mptmp[i] = PETSC_FALSE; 7213 rmapt[i] = -1; 7214 cmapt[i] = -1; 7215 rmapa[i] = NULL; 7216 cmapa[i] = NULL; 7217 } 7218 7219 /* customization */ 7220 PetscCall(PetscNew(&mmdata)); 7221 mmdata->reusesym = product->api_user; 7222 if (ptype == MATPRODUCT_AB) { 7223 if (product->api_user) { 7224 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7225 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7226 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7227 PetscOptionsEnd(); 7228 } else { 7229 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7230 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7231 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7232 PetscOptionsEnd(); 7233 } 7234 } else if (ptype == MATPRODUCT_PtAP) { 7235 if (product->api_user) { 7236 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7237 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7238 PetscOptionsEnd(); 7239 } else { 7240 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7241 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7242 PetscOptionsEnd(); 7243 } 7244 } 7245 a = (Mat_MPIAIJ *)A->data; 7246 p = (Mat_MPIAIJ *)P->data; 7247 PetscCall(MatSetSizes(C, m, n, M, N)); 7248 PetscCall(PetscLayoutSetUp(C->rmap)); 7249 PetscCall(PetscLayoutSetUp(C->cmap)); 7250 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7251 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7252 7253 cp = 0; 7254 switch (ptype) { 7255 case MATPRODUCT_AB: /* A * P */ 7256 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7257 7258 /* A_diag * P_local (merged or not) */ 7259 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7260 /* P is product->B */ 7261 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7262 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7263 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7264 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7265 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7266 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7267 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7268 mp[cp]->product->api_user = product->api_user; 7269 PetscCall(MatProductSetFromOptions(mp[cp])); 7270 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7271 PetscCall(ISGetIndices(glob, &globidx)); 7272 rmapt[cp] = 1; 7273 cmapt[cp] = 2; 7274 cmapa[cp] = globidx; 7275 mptmp[cp] = PETSC_FALSE; 7276 cp++; 7277 } else { /* A_diag * P_diag and A_diag * P_off */ 7278 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7279 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7280 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7281 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7282 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7283 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7284 mp[cp]->product->api_user = product->api_user; 7285 PetscCall(MatProductSetFromOptions(mp[cp])); 7286 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7287 rmapt[cp] = 1; 7288 cmapt[cp] = 1; 7289 mptmp[cp] = PETSC_FALSE; 7290 cp++; 7291 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7292 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7293 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7294 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7295 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7296 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7297 mp[cp]->product->api_user = product->api_user; 7298 PetscCall(MatProductSetFromOptions(mp[cp])); 7299 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7300 rmapt[cp] = 1; 7301 cmapt[cp] = 2; 7302 cmapa[cp] = p->garray; 7303 mptmp[cp] = PETSC_FALSE; 7304 cp++; 7305 } 7306 7307 /* A_off * P_other */ 7308 if (mmdata->P_oth) { 7309 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7310 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7311 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7312 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7313 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7314 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7315 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7316 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7317 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7318 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7319 mp[cp]->product->api_user = product->api_user; 7320 PetscCall(MatProductSetFromOptions(mp[cp])); 7321 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7322 rmapt[cp] = 1; 7323 cmapt[cp] = 2; 7324 cmapa[cp] = P_oth_idx; 7325 mptmp[cp] = PETSC_FALSE; 7326 cp++; 7327 } 7328 break; 7329 7330 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7331 /* A is product->B */ 7332 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7333 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7334 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7335 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7336 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7337 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7338 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7339 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7340 mp[cp]->product->api_user = product->api_user; 7341 PetscCall(MatProductSetFromOptions(mp[cp])); 7342 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7343 PetscCall(ISGetIndices(glob, &globidx)); 7344 rmapt[cp] = 2; 7345 rmapa[cp] = globidx; 7346 cmapt[cp] = 2; 7347 cmapa[cp] = globidx; 7348 mptmp[cp] = PETSC_FALSE; 7349 cp++; 7350 } else { 7351 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7352 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7353 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7354 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7355 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7356 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7357 mp[cp]->product->api_user = product->api_user; 7358 PetscCall(MatProductSetFromOptions(mp[cp])); 7359 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7360 PetscCall(ISGetIndices(glob, &globidx)); 7361 rmapt[cp] = 1; 7362 cmapt[cp] = 2; 7363 cmapa[cp] = globidx; 7364 mptmp[cp] = PETSC_FALSE; 7365 cp++; 7366 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7367 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7368 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7369 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7370 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7371 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7372 mp[cp]->product->api_user = product->api_user; 7373 PetscCall(MatProductSetFromOptions(mp[cp])); 7374 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7375 rmapt[cp] = 2; 7376 rmapa[cp] = p->garray; 7377 cmapt[cp] = 2; 7378 cmapa[cp] = globidx; 7379 mptmp[cp] = PETSC_FALSE; 7380 cp++; 7381 } 7382 break; 7383 case MATPRODUCT_PtAP: 7384 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7385 /* P is product->B */ 7386 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7387 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7388 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7389 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7390 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7391 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7392 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7393 mp[cp]->product->api_user = product->api_user; 7394 PetscCall(MatProductSetFromOptions(mp[cp])); 7395 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7396 PetscCall(ISGetIndices(glob, &globidx)); 7397 rmapt[cp] = 2; 7398 rmapa[cp] = globidx; 7399 cmapt[cp] = 2; 7400 cmapa[cp] = globidx; 7401 mptmp[cp] = PETSC_FALSE; 7402 cp++; 7403 if (mmdata->P_oth) { 7404 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7405 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7406 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7407 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7408 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7409 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7410 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7411 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7412 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7413 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7414 mp[cp]->product->api_user = product->api_user; 7415 PetscCall(MatProductSetFromOptions(mp[cp])); 7416 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7417 mptmp[cp] = PETSC_TRUE; 7418 cp++; 7419 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7420 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7421 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7422 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7423 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7424 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7425 mp[cp]->product->api_user = product->api_user; 7426 PetscCall(MatProductSetFromOptions(mp[cp])); 7427 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7428 rmapt[cp] = 2; 7429 rmapa[cp] = globidx; 7430 cmapt[cp] = 2; 7431 cmapa[cp] = P_oth_idx; 7432 mptmp[cp] = PETSC_FALSE; 7433 cp++; 7434 } 7435 break; 7436 default: 7437 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7438 } 7439 /* sanity check */ 7440 if (size > 1) 7441 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7442 7443 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7444 for (i = 0; i < cp; i++) { 7445 mmdata->mp[i] = mp[i]; 7446 mmdata->mptmp[i] = mptmp[i]; 7447 } 7448 mmdata->cp = cp; 7449 C->product->data = mmdata; 7450 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7451 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7452 7453 /* memory type */ 7454 mmdata->mtype = PETSC_MEMTYPE_HOST; 7455 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7456 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7457 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7458 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7459 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7460 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7461 7462 /* prepare coo coordinates for values insertion */ 7463 7464 /* count total nonzeros of those intermediate seqaij Mats 7465 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7466 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7467 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7468 */ 7469 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7470 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7471 if (mptmp[cp]) continue; 7472 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7473 const PetscInt *rmap = rmapa[cp]; 7474 const PetscInt mr = mp[cp]->rmap->n; 7475 const PetscInt rs = C->rmap->rstart; 7476 const PetscInt re = C->rmap->rend; 7477 const PetscInt *ii = mm->i; 7478 for (i = 0; i < mr; i++) { 7479 const PetscInt gr = rmap[i]; 7480 const PetscInt nz = ii[i + 1] - ii[i]; 7481 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7482 else ncoo_oown += nz; /* this row is local */ 7483 } 7484 } else ncoo_d += mm->nz; 7485 } 7486 7487 /* 7488 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7489 7490 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7491 7492 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7493 7494 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7495 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7496 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7497 7498 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7499 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7500 */ 7501 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7502 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7503 7504 /* gather (i,j) of nonzeros inserted by remote procs */ 7505 if (hasoffproc) { 7506 PetscSF msf; 7507 PetscInt ncoo2, *coo_i2, *coo_j2; 7508 7509 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7510 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7511 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7512 7513 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7514 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7515 PetscInt *idxoff = mmdata->off[cp]; 7516 PetscInt *idxown = mmdata->own[cp]; 7517 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7518 const PetscInt *rmap = rmapa[cp]; 7519 const PetscInt *cmap = cmapa[cp]; 7520 const PetscInt *ii = mm->i; 7521 PetscInt *coi = coo_i + ncoo_o; 7522 PetscInt *coj = coo_j + ncoo_o; 7523 const PetscInt mr = mp[cp]->rmap->n; 7524 const PetscInt rs = C->rmap->rstart; 7525 const PetscInt re = C->rmap->rend; 7526 const PetscInt cs = C->cmap->rstart; 7527 for (i = 0; i < mr; i++) { 7528 const PetscInt *jj = mm->j + ii[i]; 7529 const PetscInt gr = rmap[i]; 7530 const PetscInt nz = ii[i + 1] - ii[i]; 7531 if (gr < rs || gr >= re) { /* this is an offproc row */ 7532 for (j = ii[i]; j < ii[i + 1]; j++) { 7533 *coi++ = gr; 7534 *idxoff++ = j; 7535 } 7536 if (!cmapt[cp]) { /* already global */ 7537 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7538 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7539 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7540 } else { /* offdiag */ 7541 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7542 } 7543 ncoo_o += nz; 7544 } else { /* this is a local row */ 7545 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7546 } 7547 } 7548 } 7549 mmdata->off[cp + 1] = idxoff; 7550 mmdata->own[cp + 1] = idxown; 7551 } 7552 7553 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7554 PetscInt incoo_o; 7555 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7556 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7557 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7558 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7559 ncoo = ncoo_d + ncoo_oown + ncoo2; 7560 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7561 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7562 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7563 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7564 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7565 PetscCall(PetscFree2(coo_i, coo_j)); 7566 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7567 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7568 coo_i = coo_i2; 7569 coo_j = coo_j2; 7570 } else { /* no offproc values insertion */ 7571 ncoo = ncoo_d; 7572 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7573 7574 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7575 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7576 PetscCall(PetscSFSetUp(mmdata->sf)); 7577 } 7578 mmdata->hasoffproc = hasoffproc; 7579 7580 /* gather (i,j) of nonzeros inserted locally */ 7581 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7582 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7583 PetscInt *coi = coo_i + ncoo_d; 7584 PetscInt *coj = coo_j + ncoo_d; 7585 const PetscInt *jj = mm->j; 7586 const PetscInt *ii = mm->i; 7587 const PetscInt *cmap = cmapa[cp]; 7588 const PetscInt *rmap = rmapa[cp]; 7589 const PetscInt mr = mp[cp]->rmap->n; 7590 const PetscInt rs = C->rmap->rstart; 7591 const PetscInt re = C->rmap->rend; 7592 const PetscInt cs = C->cmap->rstart; 7593 7594 if (mptmp[cp]) continue; 7595 if (rmapt[cp] == 1) { /* consecutive rows */ 7596 /* fill coo_i */ 7597 for (i = 0; i < mr; i++) { 7598 const PetscInt gr = i + rs; 7599 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7600 } 7601 /* fill coo_j */ 7602 if (!cmapt[cp]) { /* type-0, already global */ 7603 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7604 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7605 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7606 } else { /* type-2, local to global for sparse columns */ 7607 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7608 } 7609 ncoo_d += mm->nz; 7610 } else if (rmapt[cp] == 2) { /* sparse rows */ 7611 for (i = 0; i < mr; i++) { 7612 const PetscInt *jj = mm->j + ii[i]; 7613 const PetscInt gr = rmap[i]; 7614 const PetscInt nz = ii[i + 1] - ii[i]; 7615 if (gr >= rs && gr < re) { /* local rows */ 7616 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7617 if (!cmapt[cp]) { /* type-0, already global */ 7618 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7619 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7620 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7621 } else { /* type-2, local to global for sparse columns */ 7622 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7623 } 7624 ncoo_d += nz; 7625 } 7626 } 7627 } 7628 } 7629 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7630 PetscCall(ISDestroy(&glob)); 7631 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7632 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7633 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7634 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7635 7636 /* preallocate with COO data */ 7637 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7638 PetscCall(PetscFree2(coo_i, coo_j)); 7639 PetscFunctionReturn(PETSC_SUCCESS); 7640 } 7641 7642 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7643 { 7644 Mat_Product *product = mat->product; 7645 #if defined(PETSC_HAVE_DEVICE) 7646 PetscBool match = PETSC_FALSE; 7647 PetscBool usecpu = PETSC_FALSE; 7648 #else 7649 PetscBool match = PETSC_TRUE; 7650 #endif 7651 7652 PetscFunctionBegin; 7653 MatCheckProduct(mat, 1); 7654 #if defined(PETSC_HAVE_DEVICE) 7655 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7656 if (match) { /* we can always fallback to the CPU if requested */ 7657 switch (product->type) { 7658 case MATPRODUCT_AB: 7659 if (product->api_user) { 7660 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7661 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7662 PetscOptionsEnd(); 7663 } else { 7664 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7665 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7666 PetscOptionsEnd(); 7667 } 7668 break; 7669 case MATPRODUCT_AtB: 7670 if (product->api_user) { 7671 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7672 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7673 PetscOptionsEnd(); 7674 } else { 7675 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7676 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7677 PetscOptionsEnd(); 7678 } 7679 break; 7680 case MATPRODUCT_PtAP: 7681 if (product->api_user) { 7682 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7683 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7684 PetscOptionsEnd(); 7685 } else { 7686 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7687 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7688 PetscOptionsEnd(); 7689 } 7690 break; 7691 default: 7692 break; 7693 } 7694 match = (PetscBool)!usecpu; 7695 } 7696 #endif 7697 if (match) { 7698 switch (product->type) { 7699 case MATPRODUCT_AB: 7700 case MATPRODUCT_AtB: 7701 case MATPRODUCT_PtAP: 7702 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7703 break; 7704 default: 7705 break; 7706 } 7707 } 7708 /* fallback to MPIAIJ ops */ 7709 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7710 PetscFunctionReturn(PETSC_SUCCESS); 7711 } 7712 7713 /* 7714 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7715 7716 n - the number of block indices in cc[] 7717 cc - the block indices (must be large enough to contain the indices) 7718 */ 7719 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7720 { 7721 PetscInt cnt = -1, nidx, j; 7722 const PetscInt *idx; 7723 7724 PetscFunctionBegin; 7725 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7726 if (nidx) { 7727 cnt = 0; 7728 cc[cnt] = idx[0] / bs; 7729 for (j = 1; j < nidx; j++) { 7730 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7731 } 7732 } 7733 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7734 *n = cnt + 1; 7735 PetscFunctionReturn(PETSC_SUCCESS); 7736 } 7737 7738 /* 7739 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7740 7741 ncollapsed - the number of block indices 7742 collapsed - the block indices (must be large enough to contain the indices) 7743 */ 7744 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7745 { 7746 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7747 7748 PetscFunctionBegin; 7749 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7750 for (i = start + 1; i < start + bs; i++) { 7751 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7752 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7753 cprevtmp = cprev; 7754 cprev = merged; 7755 merged = cprevtmp; 7756 } 7757 *ncollapsed = nprev; 7758 if (collapsed) *collapsed = cprev; 7759 PetscFunctionReturn(PETSC_SUCCESS); 7760 } 7761 7762 /* 7763 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7764 7765 Input Parameter: 7766 . Amat - matrix 7767 - symmetrize - make the result symmetric 7768 + scale - scale with diagonal 7769 7770 Output Parameter: 7771 . a_Gmat - output scalar graph >= 0 7772 7773 */ 7774 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7775 { 7776 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7777 MPI_Comm comm; 7778 Mat Gmat; 7779 PetscBool ismpiaij, isseqaij; 7780 Mat a, b, c; 7781 MatType jtype; 7782 7783 PetscFunctionBegin; 7784 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7785 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7786 PetscCall(MatGetSize(Amat, &MM, &NN)); 7787 PetscCall(MatGetBlockSize(Amat, &bs)); 7788 nloc = (Iend - Istart) / bs; 7789 7790 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7791 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7792 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7793 7794 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7795 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7796 implementation */ 7797 if (bs > 1) { 7798 PetscCall(MatGetType(Amat, &jtype)); 7799 PetscCall(MatCreate(comm, &Gmat)); 7800 PetscCall(MatSetType(Gmat, jtype)); 7801 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7802 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7803 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7804 PetscInt *d_nnz, *o_nnz; 7805 MatScalar *aa, val, *AA; 7806 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7807 7808 if (isseqaij) { 7809 a = Amat; 7810 b = NULL; 7811 } else { 7812 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7813 a = d->A; 7814 b = d->B; 7815 } 7816 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7817 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7818 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7819 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7820 const PetscInt *cols1, *cols2; 7821 7822 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7823 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7824 nnz[brow / bs] = nc2 / bs; 7825 if (nc2 % bs) ok = 0; 7826 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7827 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7828 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7829 if (nc1 != nc2) ok = 0; 7830 else { 7831 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7832 if (cols1[jj] != cols2[jj]) ok = 0; 7833 if (cols1[jj] % bs != jj % bs) ok = 0; 7834 } 7835 } 7836 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7837 } 7838 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7839 if (!ok) { 7840 PetscCall(PetscFree2(d_nnz, o_nnz)); 7841 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7842 goto old_bs; 7843 } 7844 } 7845 } 7846 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7847 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7848 PetscCall(PetscFree2(d_nnz, o_nnz)); 7849 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7850 // diag 7851 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7852 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7853 7854 ai = aseq->i; 7855 n = ai[brow + 1] - ai[brow]; 7856 aj = aseq->j + ai[brow]; 7857 for (PetscInt k = 0; k < n; k += bs) { // block columns 7858 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7859 val = 0; 7860 if (index_size == 0) { 7861 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7862 aa = aseq->a + ai[brow + ii] + k; 7863 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7864 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7865 } 7866 } 7867 } else { // use (index,index) value if provided 7868 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7869 PetscInt ii = index[iii]; 7870 aa = aseq->a + ai[brow + ii] + k; 7871 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7872 PetscInt jj = index[jjj]; 7873 val += PetscAbs(PetscRealPart(aa[jj])); 7874 } 7875 } 7876 } 7877 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7878 AA[k / bs] = val; 7879 } 7880 grow = Istart / bs + brow / bs; 7881 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7882 } 7883 // off-diag 7884 if (ismpiaij) { 7885 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7886 const PetscScalar *vals; 7887 const PetscInt *cols, *garray = aij->garray; 7888 7889 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7890 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7891 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7892 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7893 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7894 AA[k / bs] = 0; 7895 AJ[cidx] = garray[cols[k]] / bs; 7896 } 7897 nc = ncols / bs; 7898 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7899 if (index_size == 0) { 7900 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7901 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7902 for (PetscInt k = 0; k < ncols; k += bs) { 7903 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7904 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7905 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7906 } 7907 } 7908 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7909 } 7910 } else { // use (index,index) value if provided 7911 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7912 PetscInt ii = index[iii]; 7913 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7914 for (PetscInt k = 0; k < ncols; k += bs) { 7915 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7916 PetscInt jj = index[jjj]; 7917 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7918 } 7919 } 7920 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7921 } 7922 } 7923 grow = Istart / bs + brow / bs; 7924 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7925 } 7926 } 7927 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7928 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7929 PetscCall(PetscFree2(AA, AJ)); 7930 } else { 7931 const PetscScalar *vals; 7932 const PetscInt *idx; 7933 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7934 old_bs: 7935 /* 7936 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7937 */ 7938 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7939 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7940 if (isseqaij) { 7941 PetscInt max_d_nnz; 7942 7943 /* 7944 Determine exact preallocation count for (sequential) scalar matrix 7945 */ 7946 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7947 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7948 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7949 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7950 PetscCall(PetscFree3(w0, w1, w2)); 7951 } else if (ismpiaij) { 7952 Mat Daij, Oaij; 7953 const PetscInt *garray; 7954 PetscInt max_d_nnz; 7955 7956 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7957 /* 7958 Determine exact preallocation count for diagonal block portion of scalar matrix 7959 */ 7960 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7961 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7962 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7963 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7964 PetscCall(PetscFree3(w0, w1, w2)); 7965 /* 7966 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7967 */ 7968 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7969 o_nnz[jj] = 0; 7970 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7971 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7972 o_nnz[jj] += ncols; 7973 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7974 } 7975 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7976 } 7977 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7978 /* get scalar copy (norms) of matrix */ 7979 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7980 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7981 PetscCall(PetscFree2(d_nnz, o_nnz)); 7982 for (Ii = Istart; Ii < Iend; Ii++) { 7983 PetscInt dest_row = Ii / bs; 7984 7985 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7986 for (jj = 0; jj < ncols; jj++) { 7987 PetscInt dest_col = idx[jj] / bs; 7988 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7989 7990 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7991 } 7992 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7993 } 7994 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7995 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7996 } 7997 } else { 7998 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7999 else { 8000 Gmat = Amat; 8001 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8002 } 8003 if (isseqaij) { 8004 a = Gmat; 8005 b = NULL; 8006 } else { 8007 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8008 a = d->A; 8009 b = d->B; 8010 } 8011 if (filter >= 0 || scale) { 8012 /* take absolute value of each entry */ 8013 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8014 MatInfo info; 8015 PetscScalar *avals; 8016 8017 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8018 PetscCall(MatSeqAIJGetArray(c, &avals)); 8019 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8020 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8021 } 8022 } 8023 } 8024 if (symmetrize) { 8025 PetscBool isset, issym; 8026 8027 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8028 if (!isset || !issym) { 8029 Mat matTrans; 8030 8031 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8032 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8033 PetscCall(MatDestroy(&matTrans)); 8034 } 8035 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8036 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8037 if (scale) { 8038 /* scale c for all diagonal values = 1 or -1 */ 8039 Vec diag; 8040 8041 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8042 PetscCall(MatGetDiagonal(Gmat, diag)); 8043 PetscCall(VecReciprocal(diag)); 8044 PetscCall(VecSqrtAbs(diag)); 8045 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8046 PetscCall(VecDestroy(&diag)); 8047 } 8048 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8049 if (filter >= 0) { 8050 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8051 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8052 } 8053 *a_Gmat = Gmat; 8054 PetscFunctionReturn(PETSC_SUCCESS); 8055 } 8056 8057 /* 8058 Special version for direct calls from Fortran 8059 */ 8060 8061 /* Change these macros so can be used in void function */ 8062 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8063 #undef PetscCall 8064 #define PetscCall(...) \ 8065 do { \ 8066 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8067 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8068 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8069 return; \ 8070 } \ 8071 } while (0) 8072 8073 #undef SETERRQ 8074 #define SETERRQ(comm, ierr, ...) \ 8075 do { \ 8076 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8077 return; \ 8078 } while (0) 8079 8080 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8081 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8082 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8083 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8084 #else 8085 #endif 8086 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8087 { 8088 Mat mat = *mmat; 8089 PetscInt m = *mm, n = *mn; 8090 InsertMode addv = *maddv; 8091 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8092 PetscScalar value; 8093 8094 MatCheckPreallocated(mat, 1); 8095 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8096 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8097 { 8098 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8099 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8100 PetscBool roworiented = aij->roworiented; 8101 8102 /* Some Variables required in the macro */ 8103 Mat A = aij->A; 8104 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8105 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8106 MatScalar *aa; 8107 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8108 Mat B = aij->B; 8109 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8110 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8111 MatScalar *ba; 8112 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8113 * cannot use "#if defined" inside a macro. */ 8114 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8115 8116 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8117 PetscInt nonew = a->nonew; 8118 MatScalar *ap1, *ap2; 8119 8120 PetscFunctionBegin; 8121 PetscCall(MatSeqAIJGetArray(A, &aa)); 8122 PetscCall(MatSeqAIJGetArray(B, &ba)); 8123 for (i = 0; i < m; i++) { 8124 if (im[i] < 0) continue; 8125 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8126 if (im[i] >= rstart && im[i] < rend) { 8127 row = im[i] - rstart; 8128 lastcol1 = -1; 8129 rp1 = aj + ai[row]; 8130 ap1 = aa + ai[row]; 8131 rmax1 = aimax[row]; 8132 nrow1 = ailen[row]; 8133 low1 = 0; 8134 high1 = nrow1; 8135 lastcol2 = -1; 8136 rp2 = bj + bi[row]; 8137 ap2 = ba + bi[row]; 8138 rmax2 = bimax[row]; 8139 nrow2 = bilen[row]; 8140 low2 = 0; 8141 high2 = nrow2; 8142 8143 for (j = 0; j < n; j++) { 8144 if (roworiented) value = v[i * n + j]; 8145 else value = v[i + j * m]; 8146 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8147 if (in[j] >= cstart && in[j] < cend) { 8148 col = in[j] - cstart; 8149 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8150 } else if (in[j] < 0) continue; 8151 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8152 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8153 } else { 8154 if (mat->was_assembled) { 8155 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8156 #if defined(PETSC_USE_CTABLE) 8157 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8158 col--; 8159 #else 8160 col = aij->colmap[in[j]] - 1; 8161 #endif 8162 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8163 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8164 col = in[j]; 8165 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8166 B = aij->B; 8167 b = (Mat_SeqAIJ *)B->data; 8168 bimax = b->imax; 8169 bi = b->i; 8170 bilen = b->ilen; 8171 bj = b->j; 8172 rp2 = bj + bi[row]; 8173 ap2 = ba + bi[row]; 8174 rmax2 = bimax[row]; 8175 nrow2 = bilen[row]; 8176 low2 = 0; 8177 high2 = nrow2; 8178 bm = aij->B->rmap->n; 8179 ba = b->a; 8180 inserted = PETSC_FALSE; 8181 } 8182 } else col = in[j]; 8183 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8184 } 8185 } 8186 } else if (!aij->donotstash) { 8187 if (roworiented) { 8188 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8189 } else { 8190 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8191 } 8192 } 8193 } 8194 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8195 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8196 } 8197 PetscFunctionReturnVoid(); 8198 } 8199 8200 /* Undefining these here since they were redefined from their original definition above! No 8201 * other PETSc functions should be defined past this point, as it is impossible to recover the 8202 * original definitions */ 8203 #undef PetscCall 8204 #undef SETERRQ 8205