1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 288 PetscFunctionBegin; 289 PetscCall(MatGetSize(A, &m, &n)); 290 PetscCall(PetscCalloc1(n, &work)); 291 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 292 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 294 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 295 if (type == NORM_2) { 296 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 297 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 298 } else if (type == NORM_1) { 299 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 300 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 301 } else if (type == NORM_INFINITY) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 304 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 307 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 310 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 311 if (type == NORM_INFINITY) { 312 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 313 } else { 314 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 315 } 316 PetscCall(PetscFree(work)); 317 if (type == NORM_2) { 318 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 319 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 320 for (i = 0; i < n; i++) reductions[i] /= m; 321 } 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 326 { 327 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 328 IS sis, gis; 329 const PetscInt *isis, *igis; 330 PetscInt n, *iis, nsis, ngis, rstart, i; 331 332 PetscFunctionBegin; 333 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 334 PetscCall(MatFindNonzeroRows(a->B, &gis)); 335 PetscCall(ISGetSize(gis, &ngis)); 336 PetscCall(ISGetSize(sis, &nsis)); 337 PetscCall(ISGetIndices(sis, &isis)); 338 PetscCall(ISGetIndices(gis, &igis)); 339 340 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 341 PetscCall(PetscArraycpy(iis, igis, ngis)); 342 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 343 n = ngis + nsis; 344 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 345 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 346 for (i = 0; i < n; i++) iis[i] += rstart; 347 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 348 349 PetscCall(ISRestoreIndices(sis, &isis)); 350 PetscCall(ISRestoreIndices(gis, &igis)); 351 PetscCall(ISDestroy(&sis)); 352 PetscCall(ISDestroy(&gis)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /* 357 Local utility routine that creates a mapping from the global column 358 number to the local number in the off-diagonal part of the local 359 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 360 a slightly higher hash table cost; without it it is not scalable (each processor 361 has an order N integer array but is fast to access. 362 */ 363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 364 { 365 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 366 PetscInt n = aij->B->cmap->n, i; 367 368 PetscFunctionBegin; 369 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 370 #if defined(PETSC_USE_CTABLE) 371 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 372 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 373 #else 374 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 375 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 376 #endif 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 381 do { \ 382 if (col <= lastcol1) low1 = 0; \ 383 else high1 = nrow1; \ 384 lastcol1 = col; \ 385 while (high1 - low1 > 5) { \ 386 t = (low1 + high1) / 2; \ 387 if (rp1[t] > col) high1 = t; \ 388 else low1 = t; \ 389 } \ 390 for (_i = low1; _i < high1; _i++) { \ 391 if (rp1[_i] > col) break; \ 392 if (rp1[_i] == col) { \ 393 if (addv == ADD_VALUES) { \ 394 ap1[_i] += value; \ 395 /* Not sure LogFlops will slow dow the code or not */ \ 396 (void)PetscLogFlops(1.0); \ 397 } else ap1[_i] = value; \ 398 goto a_noinsert; \ 399 } \ 400 } \ 401 if (value == 0.0 && ignorezeroentries && row != col) { \ 402 low1 = 0; \ 403 high1 = nrow1; \ 404 goto a_noinsert; \ 405 } \ 406 if (nonew == 1) { \ 407 low1 = 0; \ 408 high1 = nrow1; \ 409 goto a_noinsert; \ 410 } \ 411 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 412 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 413 N = nrow1++ - 1; \ 414 a->nz++; \ 415 high1++; \ 416 /* shift up all the later entries in this row */ \ 417 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 418 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 419 rp1[_i] = col; \ 420 ap1[_i] = value; \ 421 A->nonzerostate++; \ 422 a_noinsert:; \ 423 ailen[row] = nrow1; \ 424 } while (0) 425 426 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 427 do { \ 428 if (col <= lastcol2) low2 = 0; \ 429 else high2 = nrow2; \ 430 lastcol2 = col; \ 431 while (high2 - low2 > 5) { \ 432 t = (low2 + high2) / 2; \ 433 if (rp2[t] > col) high2 = t; \ 434 else low2 = t; \ 435 } \ 436 for (_i = low2; _i < high2; _i++) { \ 437 if (rp2[_i] > col) break; \ 438 if (rp2[_i] == col) { \ 439 if (addv == ADD_VALUES) { \ 440 ap2[_i] += value; \ 441 (void)PetscLogFlops(1.0); \ 442 } else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) { \ 447 low2 = 0; \ 448 high2 = nrow2; \ 449 goto b_noinsert; \ 450 } \ 451 if (nonew == 1) { \ 452 low2 = 0; \ 453 high2 = nrow2; \ 454 goto b_noinsert; \ 455 } \ 456 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 457 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 458 N = nrow2++ - 1; \ 459 b->nz++; \ 460 high2++; \ 461 /* shift up all the later entries in this row */ \ 462 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 463 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 464 rp2[_i] = col; \ 465 ap2[_i] = value; \ 466 B->nonzerostate++; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = bj + bi[row]; 584 ap2 = ba + bi[row]; 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right-hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscMPIInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1128 1129 PetscFunctionBegin; 1130 /* do nondiagonal part */ 1131 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1132 /* do local part */ 1133 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1134 /* add partial results together */ 1135 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1136 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /* 1141 This only works correctly for square matrices where the subblock A->A is the 1142 diagonal block 1143 */ 1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1150 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1151 PetscCall(MatGetDiagonal(a->A, v)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatScale(a->A, aa)); 1161 PetscCall(MatScale(a->B, aa)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1170 const PetscInt *garray = aij->garray; 1171 const PetscScalar *aa, *ba; 1172 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1173 PetscInt64 nz, hnz; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 PetscMPIInt rank; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1195 if (rank == 0) { 1196 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1197 else header[3] = (PetscInt)hnz; 1198 } 1199 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1200 1201 /* fill in and store row lengths */ 1202 PetscCall(PetscMalloc1(m, &rowlens)); 1203 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1205 PetscCall(PetscFree(rowlens)); 1206 1207 /* fill in and store column indices */ 1208 PetscCall(PetscMalloc1(nz, &colidxs)); 1209 for (cnt = 0, i = 0; i < m; i++) { 1210 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1211 if (garray[B->j[jb]] > cs) break; 1212 colidxs[cnt++] = garray[B->j[jb]]; 1213 } 1214 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1215 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1216 } 1217 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1218 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1219 PetscCall(PetscFree(colidxs)); 1220 1221 /* fill in and store nonzero values */ 1222 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1223 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1224 PetscCall(PetscMalloc1(nz, &matvals)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 matvals[cnt++] = ba[jb]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1231 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1232 } 1233 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1235 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1236 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1237 PetscCall(PetscFree(matvals)); 1238 1239 /* write block size option to the viewer's .info file */ 1240 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 #include <petscdraw.h> 1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1246 { 1247 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1248 PetscMPIInt rank = aij->rank, size = aij->size; 1249 PetscBool isdraw, iascii, isbinary; 1250 PetscViewer sviewer; 1251 PetscViewerFormat format; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1255 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1256 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1257 if (iascii) { 1258 PetscCall(PetscViewerGetFormat(viewer, &format)); 1259 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1260 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1261 PetscCall(PetscMalloc1(size, &nz)); 1262 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1263 for (i = 0; i < (PetscInt)size; i++) { 1264 nmax = PetscMax(nmax, nz[i]); 1265 nmin = PetscMin(nmin, nz[i]); 1266 navg += nz[i]; 1267 } 1268 PetscCall(PetscFree(nz)); 1269 navg = navg / size; 1270 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 PetscCall(PetscViewerGetFormat(viewer, &format)); 1274 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1275 MatInfo info; 1276 PetscInt *inodes = NULL; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1279 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1280 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1281 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1282 if (!inodes) { 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1284 (double)info.memory)); 1285 } else { 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1287 (double)info.memory)); 1288 } 1289 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1293 PetscCall(PetscViewerFlush(viewer)); 1294 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1295 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1296 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1299 PetscInt inodecount, inodelimit, *inodes; 1300 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1301 if (inodes) { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1303 } else { 1304 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1305 } 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 } else if (isbinary) { 1311 if (size == 1) { 1312 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1313 PetscCall(MatView(aij->A, viewer)); 1314 } else { 1315 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (iascii && size == 1) { 1319 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1320 PetscCall(MatView(aij->A, viewer)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } else if (isdraw) { 1323 PetscDraw draw; 1324 PetscBool isnull; 1325 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1326 PetscCall(PetscDrawIsNull(draw, &isnull)); 1327 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 { /* assemble the entire matrix onto first processor */ 1331 Mat A = NULL, Av; 1332 IS isrow, iscol; 1333 1334 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1335 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1336 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1337 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1338 /* The commented code uses MatCreateSubMatrices instead */ 1339 /* 1340 Mat *AA, A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1344 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1345 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1346 if (rank == 0) { 1347 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1348 A = AA[0]; 1349 Av = AA[0]; 1350 } 1351 PetscCall(MatDestroySubMatrices(1,&AA)); 1352 */ 1353 PetscCall(ISDestroy(&iscol)); 1354 PetscCall(ISDestroy(&isrow)); 1355 /* 1356 Everyone has to call to draw the matrix since the graphics waits are 1357 synchronized across all processors that share the PetscDraw object 1358 */ 1359 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 if (rank == 0) { 1361 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1362 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1363 } 1364 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1365 PetscCall(MatDestroy(&A)); 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1371 { 1372 PetscBool iascii, isdraw, issocket, isbinary; 1373 1374 PetscFunctionBegin; 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1377 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1378 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1379 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1384 { 1385 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1386 Vec bb1 = NULL; 1387 PetscBool hasop; 1388 1389 PetscFunctionBegin; 1390 if (flag == SOR_APPLY_UPPER) { 1391 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1396 1397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1398 if (flag & SOR_ZERO_INITIAL_GUESS) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 its--; 1401 } 1402 1403 while (its--) { 1404 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1405 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1406 1407 /* update rhs: bb1 = bb - B*x */ 1408 PetscCall(VecScale(mat->lvec, -1.0)); 1409 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1410 1411 /* local sweep */ 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1413 } 1414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_EISENSTAT) { 1447 Vec xx1; 1448 1449 PetscCall(VecDuplicate(bb, &xx1)); 1450 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1451 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 if (!mat->diag) { 1455 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1456 PetscCall(MatGetDiagonal(matin, mat->diag)); 1457 } 1458 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1459 if (hasop) { 1460 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1461 } else { 1462 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1463 } 1464 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1465 1466 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1467 1468 /* local sweep */ 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1470 PetscCall(VecAXPY(xx, 1.0, xx1)); 1471 PetscCall(VecDestroy(&xx1)); 1472 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1473 1474 PetscCall(VecDestroy(&bb1)); 1475 1476 matin->factorerrortype = mat->A->factorerrortype; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1481 { 1482 Mat aA, aB, Aperm; 1483 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1484 PetscScalar *aa, *ba; 1485 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1486 PetscSF rowsf, sf; 1487 IS parcolp = NULL; 1488 PetscBool done; 1489 1490 PetscFunctionBegin; 1491 PetscCall(MatGetLocalSize(A, &m, &n)); 1492 PetscCall(ISGetIndices(rowp, &rwant)); 1493 PetscCall(ISGetIndices(colp, &cwant)); 1494 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1495 1496 /* Invert row permutation to find out where my rows should go */ 1497 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1498 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1499 PetscCall(PetscSFSetFromOptions(rowsf)); 1500 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1501 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1502 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1503 1504 /* Invert column permutation to find out where my columns should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1506 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1507 PetscCall(PetscSFSetFromOptions(sf)); 1508 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1511 PetscCall(PetscSFDestroy(&sf)); 1512 1513 PetscCall(ISRestoreIndices(rowp, &rwant)); 1514 PetscCall(ISRestoreIndices(colp, &cwant)); 1515 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1516 1517 /* Find out where my gcols should go */ 1518 PetscCall(MatGetSize(aB, NULL, &ng)); 1519 PetscCall(PetscMalloc1(ng, &gcdest)); 1520 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1521 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1522 PetscCall(PetscSFSetFromOptions(sf)); 1523 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1524 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1525 PetscCall(PetscSFDestroy(&sf)); 1526 1527 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1528 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1529 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1530 for (i = 0; i < m; i++) { 1531 PetscInt row = rdest[i]; 1532 PetscMPIInt rowner; 1533 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 PetscInt col = cdest[aj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 for (j = bi[i]; j < bi[i + 1]; j++) { 1542 PetscInt col = gcdest[bj[j]]; 1543 PetscMPIInt cowner; 1544 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1545 if (rowner == cowner) dnnz[i]++; 1546 else onnz[i]++; 1547 } 1548 } 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1551 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&rowsf)); 1554 1555 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1556 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1557 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1560 PetscInt j0, rowlen; 1561 rowlen = ai[i + 1] - ai[i]; 1562 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1563 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1564 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1565 } 1566 rowlen = bi[i + 1] - bi[i]; 1567 for (j0 = j = 0; j < rowlen; j0 = j) { 1568 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1569 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1570 } 1571 } 1572 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1575 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1576 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1577 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1578 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1579 PetscCall(PetscFree3(work, rdest, cdest)); 1580 PetscCall(PetscFree(gcdest)); 1581 if (parcolp) PetscCall(ISDestroy(&colp)); 1582 *B = Aperm; 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 } 1585 1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1587 { 1588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1589 1590 PetscFunctionBegin; 1591 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1592 if (ghosts) *ghosts = aij->garray; 1593 PetscFunctionReturn(PETSC_SUCCESS); 1594 } 1595 1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1597 { 1598 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1599 Mat A = mat->A, B = mat->B; 1600 PetscLogDouble isend[5], irecv[5]; 1601 1602 PetscFunctionBegin; 1603 info->block_size = 1.0; 1604 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1605 1606 isend[0] = info->nz_used; 1607 isend[1] = info->nz_allocated; 1608 isend[2] = info->nz_unneeded; 1609 isend[3] = info->memory; 1610 isend[4] = info->mallocs; 1611 1612 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1613 1614 isend[0] += info->nz_used; 1615 isend[1] += info->nz_allocated; 1616 isend[2] += info->nz_unneeded; 1617 isend[3] += info->memory; 1618 isend[4] += info->mallocs; 1619 if (flag == MAT_LOCAL) { 1620 info->nz_used = isend[0]; 1621 info->nz_allocated = isend[1]; 1622 info->nz_unneeded = isend[2]; 1623 info->memory = isend[3]; 1624 info->mallocs = isend[4]; 1625 } else if (flag == MAT_GLOBAL_MAX) { 1626 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1627 1628 info->nz_used = irecv[0]; 1629 info->nz_allocated = irecv[1]; 1630 info->nz_unneeded = irecv[2]; 1631 info->memory = irecv[3]; 1632 info->mallocs = irecv[4]; 1633 } else if (flag == MAT_GLOBAL_SUM) { 1634 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } 1642 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1643 info->fill_ratio_needed = 0; 1644 info->factor_mallocs = 0; 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1649 { 1650 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1651 1652 PetscFunctionBegin; 1653 switch (op) { 1654 case MAT_NEW_NONZERO_LOCATIONS: 1655 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1656 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1657 case MAT_KEEP_NONZERO_PATTERN: 1658 case MAT_NEW_NONZERO_LOCATION_ERR: 1659 case MAT_USE_INODES: 1660 case MAT_IGNORE_ZERO_ENTRIES: 1661 case MAT_FORM_EXPLICIT_TRANSPOSE: 1662 MatCheckPreallocated(A, 1); 1663 PetscCall(MatSetOption(a->A, op, flg)); 1664 PetscCall(MatSetOption(a->B, op, flg)); 1665 break; 1666 case MAT_ROW_ORIENTED: 1667 MatCheckPreallocated(A, 1); 1668 a->roworiented = flg; 1669 1670 PetscCall(MatSetOption(a->A, op, flg)); 1671 PetscCall(MatSetOption(a->B, op, flg)); 1672 break; 1673 case MAT_FORCE_DIAGONAL_ENTRIES: 1674 case MAT_SORTED_FULL: 1675 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1676 break; 1677 case MAT_IGNORE_OFF_PROC_ENTRIES: 1678 a->donotstash = flg; 1679 break; 1680 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1681 case MAT_SPD: 1682 case MAT_SYMMETRIC: 1683 case MAT_STRUCTURALLY_SYMMETRIC: 1684 case MAT_HERMITIAN: 1685 case MAT_SYMMETRY_ETERNAL: 1686 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1687 case MAT_SPD_ETERNAL: 1688 /* if the diagonal matrix is square it inherits some of the properties above */ 1689 break; 1690 case MAT_SUBMAT_SINGLEIS: 1691 A->submat_singleis = flg; 1692 break; 1693 case MAT_STRUCTURE_ONLY: 1694 /* The option is handled directly by MatSetOption() */ 1695 break; 1696 default: 1697 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1698 } 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1703 { 1704 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1705 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1706 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1707 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1708 PetscInt *cmap, *idx_p; 1709 1710 PetscFunctionBegin; 1711 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1712 mat->getrowactive = PETSC_TRUE; 1713 1714 if (!mat->rowvalues && (idx || v)) { 1715 /* 1716 allocate enough space to hold information from the longest row. 1717 */ 1718 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1719 PetscInt max = 1, tmp; 1720 for (i = 0; i < matin->rmap->n; i++) { 1721 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1722 if (max < tmp) max = tmp; 1723 } 1724 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1725 } 1726 1727 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1728 lrow = row - rstart; 1729 1730 pvA = &vworkA; 1731 pcA = &cworkA; 1732 pvB = &vworkB; 1733 pcB = &cworkB; 1734 if (!v) { 1735 pvA = NULL; 1736 pvB = NULL; 1737 } 1738 if (!idx) { 1739 pcA = NULL; 1740 if (!v) pcB = NULL; 1741 } 1742 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1743 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1744 nztot = nzA + nzB; 1745 1746 cmap = mat->garray; 1747 if (v || idx) { 1748 if (nztot) { 1749 /* Sort by increasing column numbers, assuming A and B already sorted */ 1750 PetscInt imark = -1; 1751 if (v) { 1752 *v = v_p = mat->rowvalues; 1753 for (i = 0; i < nzB; i++) { 1754 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1755 else break; 1756 } 1757 imark = i; 1758 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1759 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1760 } 1761 if (idx) { 1762 *idx = idx_p = mat->rowindices; 1763 if (imark > -1) { 1764 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1765 } else { 1766 for (i = 0; i < nzB; i++) { 1767 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1768 else break; 1769 } 1770 imark = i; 1771 } 1772 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1773 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1774 } 1775 } else { 1776 if (idx) *idx = NULL; 1777 if (v) *v = NULL; 1778 } 1779 } 1780 *nz = nztot; 1781 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1782 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1787 { 1788 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1789 1790 PetscFunctionBegin; 1791 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1792 aij->getrowactive = PETSC_FALSE; 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1800 PetscInt i, j, cstart = mat->cmap->rstart; 1801 PetscReal sum = 0.0; 1802 const MatScalar *v, *amata, *bmata; 1803 1804 PetscFunctionBegin; 1805 if (aij->size == 1) { 1806 PetscCall(MatNorm(aij->A, type, norm)); 1807 } else { 1808 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1809 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1810 if (type == NORM_FROBENIUS) { 1811 v = amata; 1812 for (i = 0; i < amat->nz; i++) { 1813 sum += PetscRealPart(PetscConj(*v) * (*v)); 1814 v++; 1815 } 1816 v = bmata; 1817 for (i = 0; i < bmat->nz; i++) { 1818 sum += PetscRealPart(PetscConj(*v) * (*v)); 1819 v++; 1820 } 1821 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1822 *norm = PetscSqrtReal(*norm); 1823 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1824 } else if (type == NORM_1) { /* max column norm */ 1825 PetscReal *tmp, *tmp2; 1826 PetscInt *jj, *garray = aij->garray; 1827 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1828 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1829 *norm = 0.0; 1830 v = amata; 1831 jj = amat->j; 1832 for (j = 0; j < amat->nz; j++) { 1833 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1834 v++; 1835 } 1836 v = bmata; 1837 jj = bmat->j; 1838 for (j = 0; j < bmat->nz; j++) { 1839 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1840 v++; 1841 } 1842 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1843 for (j = 0; j < mat->cmap->N; j++) { 1844 if (tmp2[j] > *norm) *norm = tmp2[j]; 1845 } 1846 PetscCall(PetscFree(tmp)); 1847 PetscCall(PetscFree(tmp2)); 1848 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1849 } else if (type == NORM_INFINITY) { /* max row norm */ 1850 PetscReal ntemp = 0.0; 1851 for (j = 0; j < aij->A->rmap->n; j++) { 1852 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1853 sum = 0.0; 1854 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1855 sum += PetscAbsScalar(*v); 1856 v++; 1857 } 1858 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1859 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1860 sum += PetscAbsScalar(*v); 1861 v++; 1862 } 1863 if (sum > ntemp) ntemp = sum; 1864 } 1865 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1866 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1867 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1868 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1869 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1870 } 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 1874 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1875 { 1876 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1877 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1878 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1879 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1880 Mat B, A_diag, *B_diag; 1881 const MatScalar *pbv, *bv; 1882 1883 PetscFunctionBegin; 1884 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1885 ma = A->rmap->n; 1886 na = A->cmap->n; 1887 mb = a->B->rmap->n; 1888 nb = a->B->cmap->n; 1889 ai = Aloc->i; 1890 aj = Aloc->j; 1891 bi = Bloc->i; 1892 bj = Bloc->j; 1893 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1894 PetscInt *d_nnz, *g_nnz, *o_nnz; 1895 PetscSFNode *oloc; 1896 PETSC_UNUSED PetscSF sf; 1897 1898 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1899 /* compute d_nnz for preallocation */ 1900 PetscCall(PetscArrayzero(d_nnz, na)); 1901 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1902 /* compute local off-diagonal contributions */ 1903 PetscCall(PetscArrayzero(g_nnz, nb)); 1904 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1905 /* map those to global */ 1906 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1907 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1908 PetscCall(PetscSFSetFromOptions(sf)); 1909 PetscCall(PetscArrayzero(o_nnz, na)); 1910 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1911 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1912 PetscCall(PetscSFDestroy(&sf)); 1913 1914 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1915 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1916 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1917 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1918 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1919 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1920 } else { 1921 B = *matout; 1922 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1923 } 1924 1925 b = (Mat_MPIAIJ *)B->data; 1926 A_diag = a->A; 1927 B_diag = &b->A; 1928 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1929 A_diag_ncol = A_diag->cmap->N; 1930 B_diag_ilen = sub_B_diag->ilen; 1931 B_diag_i = sub_B_diag->i; 1932 1933 /* Set ilen for diagonal of B */ 1934 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1935 1936 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1937 very quickly (=without using MatSetValues), because all writes are local. */ 1938 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1939 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1940 1941 /* copy over the B part */ 1942 PetscCall(PetscMalloc1(bi[mb], &cols)); 1943 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1944 pbv = bv; 1945 row = A->rmap->rstart; 1946 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1947 cols_tmp = cols; 1948 for (i = 0; i < mb; i++) { 1949 ncol = bi[i + 1] - bi[i]; 1950 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1951 row++; 1952 if (pbv) pbv += ncol; 1953 if (cols_tmp) cols_tmp += ncol; 1954 } 1955 PetscCall(PetscFree(cols)); 1956 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1957 1958 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1959 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1960 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1961 *matout = B; 1962 } else { 1963 PetscCall(MatHeaderMerge(A, &B)); 1964 } 1965 PetscFunctionReturn(PETSC_SUCCESS); 1966 } 1967 1968 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1969 { 1970 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1971 Mat a = aij->A, b = aij->B; 1972 PetscInt s1, s2, s3; 1973 1974 PetscFunctionBegin; 1975 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1976 if (rr) { 1977 PetscCall(VecGetLocalSize(rr, &s1)); 1978 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1979 /* Overlap communication with computation. */ 1980 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1981 } 1982 if (ll) { 1983 PetscCall(VecGetLocalSize(ll, &s1)); 1984 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1985 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1986 } 1987 /* scale the diagonal block */ 1988 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1989 1990 if (rr) { 1991 /* Do a scatter end and then right scale the off-diagonal block */ 1992 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1993 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1994 } 1995 PetscFunctionReturn(PETSC_SUCCESS); 1996 } 1997 1998 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1999 { 2000 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2001 2002 PetscFunctionBegin; 2003 PetscCall(MatSetUnfactored(a->A)); 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2008 { 2009 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2010 Mat a, b, c, d; 2011 PetscBool flg; 2012 2013 PetscFunctionBegin; 2014 a = matA->A; 2015 b = matA->B; 2016 c = matB->A; 2017 d = matB->B; 2018 2019 PetscCall(MatEqual(a, c, &flg)); 2020 if (flg) PetscCall(MatEqual(b, d, &flg)); 2021 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2022 PetscFunctionReturn(PETSC_SUCCESS); 2023 } 2024 2025 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2026 { 2027 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2028 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2029 2030 PetscFunctionBegin; 2031 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2032 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2033 /* because of the column compression in the off-processor part of the matrix a->B, 2034 the number of columns in a->B and b->B may be different, hence we cannot call 2035 the MatCopy() directly on the two parts. If need be, we can provide a more 2036 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2037 then copying the submatrices */ 2038 PetscCall(MatCopy_Basic(A, B, str)); 2039 } else { 2040 PetscCall(MatCopy(a->A, b->A, str)); 2041 PetscCall(MatCopy(a->B, b->B, str)); 2042 } 2043 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2044 PetscFunctionReturn(PETSC_SUCCESS); 2045 } 2046 2047 /* 2048 Computes the number of nonzeros per row needed for preallocation when X and Y 2049 have different nonzero structure. 2050 */ 2051 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2052 { 2053 PetscInt i, j, k, nzx, nzy; 2054 2055 PetscFunctionBegin; 2056 /* Set the number of nonzeros in the new matrix */ 2057 for (i = 0; i < m; i++) { 2058 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2059 nzx = xi[i + 1] - xi[i]; 2060 nzy = yi[i + 1] - yi[i]; 2061 nnz[i] = 0; 2062 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2063 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2064 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2065 nnz[i]++; 2066 } 2067 for (; k < nzy; k++) nnz[i]++; 2068 } 2069 PetscFunctionReturn(PETSC_SUCCESS); 2070 } 2071 2072 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2073 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2074 { 2075 PetscInt m = Y->rmap->N; 2076 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2077 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2078 2079 PetscFunctionBegin; 2080 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2081 PetscFunctionReturn(PETSC_SUCCESS); 2082 } 2083 2084 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2085 { 2086 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 if (str == SAME_NONZERO_PATTERN) { 2090 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2091 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2092 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2093 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2094 } else { 2095 Mat B; 2096 PetscInt *nnz_d, *nnz_o; 2097 2098 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2099 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2100 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2101 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2102 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2103 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2104 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2105 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2106 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2107 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2108 PetscCall(MatHeaderMerge(Y, &B)); 2109 PetscCall(PetscFree(nnz_d)); 2110 PetscCall(PetscFree(nnz_o)); 2111 } 2112 PetscFunctionReturn(PETSC_SUCCESS); 2113 } 2114 2115 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2116 2117 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2118 { 2119 PetscFunctionBegin; 2120 if (PetscDefined(USE_COMPLEX)) { 2121 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2122 2123 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2124 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2125 } 2126 PetscFunctionReturn(PETSC_SUCCESS); 2127 } 2128 2129 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2130 { 2131 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2132 2133 PetscFunctionBegin; 2134 PetscCall(MatRealPart(a->A)); 2135 PetscCall(MatRealPart(a->B)); 2136 PetscFunctionReturn(PETSC_SUCCESS); 2137 } 2138 2139 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2140 { 2141 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2142 2143 PetscFunctionBegin; 2144 PetscCall(MatImaginaryPart(a->A)); 2145 PetscCall(MatImaginaryPart(a->B)); 2146 PetscFunctionReturn(PETSC_SUCCESS); 2147 } 2148 2149 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2150 { 2151 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2152 PetscInt i, *idxb = NULL, m = A->rmap->n; 2153 PetscScalar *va, *vv; 2154 Vec vB, vA; 2155 const PetscScalar *vb; 2156 2157 PetscFunctionBegin; 2158 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2159 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2160 2161 PetscCall(VecGetArrayWrite(vA, &va)); 2162 if (idx) { 2163 for (i = 0; i < m; i++) { 2164 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2165 } 2166 } 2167 2168 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2169 PetscCall(PetscMalloc1(m, &idxb)); 2170 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2171 2172 PetscCall(VecGetArrayWrite(v, &vv)); 2173 PetscCall(VecGetArrayRead(vB, &vb)); 2174 for (i = 0; i < m; i++) { 2175 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2176 vv[i] = vb[i]; 2177 if (idx) idx[i] = a->garray[idxb[i]]; 2178 } else { 2179 vv[i] = va[i]; 2180 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2181 } 2182 } 2183 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2184 PetscCall(VecRestoreArrayWrite(vA, &va)); 2185 PetscCall(VecRestoreArrayRead(vB, &vb)); 2186 PetscCall(PetscFree(idxb)); 2187 PetscCall(VecDestroy(&vA)); 2188 PetscCall(VecDestroy(&vB)); 2189 PetscFunctionReturn(PETSC_SUCCESS); 2190 } 2191 2192 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2193 { 2194 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2195 PetscInt m = A->rmap->n; 2196 Vec vB, vA; 2197 2198 PetscFunctionBegin; 2199 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2200 PetscCall(MatGetRowSumAbs(a->A, vA)); 2201 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2202 PetscCall(MatGetRowSumAbs(a->B, vB)); 2203 PetscCall(VecAXPY(vA, 1.0, vB)); 2204 PetscCall(VecDestroy(&vB)); 2205 PetscCall(VecCopy(vA, v)); 2206 PetscCall(VecDestroy(&vA)); 2207 PetscFunctionReturn(PETSC_SUCCESS); 2208 } 2209 2210 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2211 { 2212 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2213 PetscInt m = A->rmap->n, n = A->cmap->n; 2214 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2215 PetscInt *cmap = mat->garray; 2216 PetscInt *diagIdx, *offdiagIdx; 2217 Vec diagV, offdiagV; 2218 PetscScalar *a, *diagA, *offdiagA; 2219 const PetscScalar *ba, *bav; 2220 PetscInt r, j, col, ncols, *bi, *bj; 2221 Mat B = mat->B; 2222 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2223 2224 PetscFunctionBegin; 2225 /* When a process holds entire A and other processes have no entry */ 2226 if (A->cmap->N == n) { 2227 PetscCall(VecGetArrayWrite(v, &diagA)); 2228 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2229 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2230 PetscCall(VecDestroy(&diagV)); 2231 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2232 PetscFunctionReturn(PETSC_SUCCESS); 2233 } else if (n == 0) { 2234 if (m) { 2235 PetscCall(VecGetArrayWrite(v, &a)); 2236 for (r = 0; r < m; r++) { 2237 a[r] = 0.0; 2238 if (idx) idx[r] = -1; 2239 } 2240 PetscCall(VecRestoreArrayWrite(v, &a)); 2241 } 2242 PetscFunctionReturn(PETSC_SUCCESS); 2243 } 2244 2245 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2246 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2248 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2249 2250 /* Get offdiagIdx[] for implicit 0.0 */ 2251 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2252 ba = bav; 2253 bi = b->i; 2254 bj = b->j; 2255 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2256 for (r = 0; r < m; r++) { 2257 ncols = bi[r + 1] - bi[r]; 2258 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2259 offdiagA[r] = *ba; 2260 offdiagIdx[r] = cmap[0]; 2261 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2262 offdiagA[r] = 0.0; 2263 2264 /* Find first hole in the cmap */ 2265 for (j = 0; j < ncols; j++) { 2266 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2267 if (col > j && j < cstart) { 2268 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2269 break; 2270 } else if (col > j + n && j >= cstart) { 2271 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2272 break; 2273 } 2274 } 2275 if (j == ncols && ncols < A->cmap->N - n) { 2276 /* a hole is outside compressed Bcols */ 2277 if (ncols == 0) { 2278 if (cstart) { 2279 offdiagIdx[r] = 0; 2280 } else offdiagIdx[r] = cend; 2281 } else { /* ncols > 0 */ 2282 offdiagIdx[r] = cmap[ncols - 1] + 1; 2283 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2284 } 2285 } 2286 } 2287 2288 for (j = 0; j < ncols; j++) { 2289 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2290 offdiagA[r] = *ba; 2291 offdiagIdx[r] = cmap[*bj]; 2292 } 2293 ba++; 2294 bj++; 2295 } 2296 } 2297 2298 PetscCall(VecGetArrayWrite(v, &a)); 2299 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2300 for (r = 0; r < m; ++r) { 2301 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2302 a[r] = diagA[r]; 2303 if (idx) idx[r] = cstart + diagIdx[r]; 2304 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2305 a[r] = diagA[r]; 2306 if (idx) { 2307 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2308 idx[r] = cstart + diagIdx[r]; 2309 } else idx[r] = offdiagIdx[r]; 2310 } 2311 } else { 2312 a[r] = offdiagA[r]; 2313 if (idx) idx[r] = offdiagIdx[r]; 2314 } 2315 } 2316 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2317 PetscCall(VecRestoreArrayWrite(v, &a)); 2318 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2319 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2320 PetscCall(VecDestroy(&diagV)); 2321 PetscCall(VecDestroy(&offdiagV)); 2322 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2323 PetscFunctionReturn(PETSC_SUCCESS); 2324 } 2325 2326 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2327 { 2328 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2329 PetscInt m = A->rmap->n, n = A->cmap->n; 2330 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2331 PetscInt *cmap = mat->garray; 2332 PetscInt *diagIdx, *offdiagIdx; 2333 Vec diagV, offdiagV; 2334 PetscScalar *a, *diagA, *offdiagA; 2335 const PetscScalar *ba, *bav; 2336 PetscInt r, j, col, ncols, *bi, *bj; 2337 Mat B = mat->B; 2338 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2339 2340 PetscFunctionBegin; 2341 /* When a process holds entire A and other processes have no entry */ 2342 if (A->cmap->N == n) { 2343 PetscCall(VecGetArrayWrite(v, &diagA)); 2344 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2345 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2346 PetscCall(VecDestroy(&diagV)); 2347 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2348 PetscFunctionReturn(PETSC_SUCCESS); 2349 } else if (n == 0) { 2350 if (m) { 2351 PetscCall(VecGetArrayWrite(v, &a)); 2352 for (r = 0; r < m; r++) { 2353 a[r] = PETSC_MAX_REAL; 2354 if (idx) idx[r] = -1; 2355 } 2356 PetscCall(VecRestoreArrayWrite(v, &a)); 2357 } 2358 PetscFunctionReturn(PETSC_SUCCESS); 2359 } 2360 2361 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2362 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2363 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2364 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2365 2366 /* Get offdiagIdx[] for implicit 0.0 */ 2367 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2368 ba = bav; 2369 bi = b->i; 2370 bj = b->j; 2371 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2372 for (r = 0; r < m; r++) { 2373 ncols = bi[r + 1] - bi[r]; 2374 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2375 offdiagA[r] = *ba; 2376 offdiagIdx[r] = cmap[0]; 2377 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2378 offdiagA[r] = 0.0; 2379 2380 /* Find first hole in the cmap */ 2381 for (j = 0; j < ncols; j++) { 2382 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2383 if (col > j && j < cstart) { 2384 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2385 break; 2386 } else if (col > j + n && j >= cstart) { 2387 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2388 break; 2389 } 2390 } 2391 if (j == ncols && ncols < A->cmap->N - n) { 2392 /* a hole is outside compressed Bcols */ 2393 if (ncols == 0) { 2394 if (cstart) { 2395 offdiagIdx[r] = 0; 2396 } else offdiagIdx[r] = cend; 2397 } else { /* ncols > 0 */ 2398 offdiagIdx[r] = cmap[ncols - 1] + 1; 2399 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2400 } 2401 } 2402 } 2403 2404 for (j = 0; j < ncols; j++) { 2405 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2406 offdiagA[r] = *ba; 2407 offdiagIdx[r] = cmap[*bj]; 2408 } 2409 ba++; 2410 bj++; 2411 } 2412 } 2413 2414 PetscCall(VecGetArrayWrite(v, &a)); 2415 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2416 for (r = 0; r < m; ++r) { 2417 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2418 a[r] = diagA[r]; 2419 if (idx) idx[r] = cstart + diagIdx[r]; 2420 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2421 a[r] = diagA[r]; 2422 if (idx) { 2423 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2424 idx[r] = cstart + diagIdx[r]; 2425 } else idx[r] = offdiagIdx[r]; 2426 } 2427 } else { 2428 a[r] = offdiagA[r]; 2429 if (idx) idx[r] = offdiagIdx[r]; 2430 } 2431 } 2432 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2433 PetscCall(VecRestoreArrayWrite(v, &a)); 2434 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2435 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2436 PetscCall(VecDestroy(&diagV)); 2437 PetscCall(VecDestroy(&offdiagV)); 2438 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2439 PetscFunctionReturn(PETSC_SUCCESS); 2440 } 2441 2442 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2443 { 2444 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2445 PetscInt m = A->rmap->n, n = A->cmap->n; 2446 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2447 PetscInt *cmap = mat->garray; 2448 PetscInt *diagIdx, *offdiagIdx; 2449 Vec diagV, offdiagV; 2450 PetscScalar *a, *diagA, *offdiagA; 2451 const PetscScalar *ba, *bav; 2452 PetscInt r, j, col, ncols, *bi, *bj; 2453 Mat B = mat->B; 2454 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2455 2456 PetscFunctionBegin; 2457 /* When a process holds entire A and other processes have no entry */ 2458 if (A->cmap->N == n) { 2459 PetscCall(VecGetArrayWrite(v, &diagA)); 2460 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2461 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2462 PetscCall(VecDestroy(&diagV)); 2463 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2464 PetscFunctionReturn(PETSC_SUCCESS); 2465 } else if (n == 0) { 2466 if (m) { 2467 PetscCall(VecGetArrayWrite(v, &a)); 2468 for (r = 0; r < m; r++) { 2469 a[r] = PETSC_MIN_REAL; 2470 if (idx) idx[r] = -1; 2471 } 2472 PetscCall(VecRestoreArrayWrite(v, &a)); 2473 } 2474 PetscFunctionReturn(PETSC_SUCCESS); 2475 } 2476 2477 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2478 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2479 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2480 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2481 2482 /* Get offdiagIdx[] for implicit 0.0 */ 2483 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2484 ba = bav; 2485 bi = b->i; 2486 bj = b->j; 2487 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2488 for (r = 0; r < m; r++) { 2489 ncols = bi[r + 1] - bi[r]; 2490 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2491 offdiagA[r] = *ba; 2492 offdiagIdx[r] = cmap[0]; 2493 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2494 offdiagA[r] = 0.0; 2495 2496 /* Find first hole in the cmap */ 2497 for (j = 0; j < ncols; j++) { 2498 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2499 if (col > j && j < cstart) { 2500 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2501 break; 2502 } else if (col > j + n && j >= cstart) { 2503 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2504 break; 2505 } 2506 } 2507 if (j == ncols && ncols < A->cmap->N - n) { 2508 /* a hole is outside compressed Bcols */ 2509 if (ncols == 0) { 2510 if (cstart) { 2511 offdiagIdx[r] = 0; 2512 } else offdiagIdx[r] = cend; 2513 } else { /* ncols > 0 */ 2514 offdiagIdx[r] = cmap[ncols - 1] + 1; 2515 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2516 } 2517 } 2518 } 2519 2520 for (j = 0; j < ncols; j++) { 2521 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2522 offdiagA[r] = *ba; 2523 offdiagIdx[r] = cmap[*bj]; 2524 } 2525 ba++; 2526 bj++; 2527 } 2528 } 2529 2530 PetscCall(VecGetArrayWrite(v, &a)); 2531 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2532 for (r = 0; r < m; ++r) { 2533 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2534 a[r] = diagA[r]; 2535 if (idx) idx[r] = cstart + diagIdx[r]; 2536 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2537 a[r] = diagA[r]; 2538 if (idx) { 2539 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2540 idx[r] = cstart + diagIdx[r]; 2541 } else idx[r] = offdiagIdx[r]; 2542 } 2543 } else { 2544 a[r] = offdiagA[r]; 2545 if (idx) idx[r] = offdiagIdx[r]; 2546 } 2547 } 2548 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2549 PetscCall(VecRestoreArrayWrite(v, &a)); 2550 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2551 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2552 PetscCall(VecDestroy(&diagV)); 2553 PetscCall(VecDestroy(&offdiagV)); 2554 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2555 PetscFunctionReturn(PETSC_SUCCESS); 2556 } 2557 2558 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2559 { 2560 Mat *dummy; 2561 2562 PetscFunctionBegin; 2563 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2564 *newmat = *dummy; 2565 PetscCall(PetscFree(dummy)); 2566 PetscFunctionReturn(PETSC_SUCCESS); 2567 } 2568 2569 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2570 { 2571 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2572 2573 PetscFunctionBegin; 2574 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2575 A->factorerrortype = a->A->factorerrortype; 2576 PetscFunctionReturn(PETSC_SUCCESS); 2577 } 2578 2579 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2580 { 2581 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2582 2583 PetscFunctionBegin; 2584 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2585 PetscCall(MatSetRandom(aij->A, rctx)); 2586 if (x->assembled) { 2587 PetscCall(MatSetRandom(aij->B, rctx)); 2588 } else { 2589 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2590 } 2591 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2592 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2593 PetscFunctionReturn(PETSC_SUCCESS); 2594 } 2595 2596 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2597 { 2598 PetscFunctionBegin; 2599 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2600 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 /*@ 2605 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2606 2607 Not Collective 2608 2609 Input Parameter: 2610 . A - the matrix 2611 2612 Output Parameter: 2613 . nz - the number of nonzeros 2614 2615 Level: advanced 2616 2617 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2618 @*/ 2619 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2620 { 2621 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2622 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2623 PetscBool isaij; 2624 2625 PetscFunctionBegin; 2626 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2627 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2628 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2629 PetscFunctionReturn(PETSC_SUCCESS); 2630 } 2631 2632 /*@ 2633 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2634 2635 Collective 2636 2637 Input Parameters: 2638 + A - the matrix 2639 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2640 2641 Level: advanced 2642 2643 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2644 @*/ 2645 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2646 { 2647 PetscFunctionBegin; 2648 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2649 PetscFunctionReturn(PETSC_SUCCESS); 2650 } 2651 2652 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2653 { 2654 PetscBool sc = PETSC_FALSE, flg; 2655 2656 PetscFunctionBegin; 2657 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2658 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2659 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2660 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2661 PetscOptionsHeadEnd(); 2662 PetscFunctionReturn(PETSC_SUCCESS); 2663 } 2664 2665 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2666 { 2667 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2668 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2669 2670 PetscFunctionBegin; 2671 if (!Y->preallocated) { 2672 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2673 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2674 PetscInt nonew = aij->nonew; 2675 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2676 aij->nonew = nonew; 2677 } 2678 PetscCall(MatShift_Basic(Y, a)); 2679 PetscFunctionReturn(PETSC_SUCCESS); 2680 } 2681 2682 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2683 { 2684 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2685 2686 PetscFunctionBegin; 2687 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2688 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2689 if (d) { 2690 PetscInt rstart; 2691 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2692 *d += rstart; 2693 } 2694 PetscFunctionReturn(PETSC_SUCCESS); 2695 } 2696 2697 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2698 { 2699 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2700 2701 PetscFunctionBegin; 2702 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2703 PetscFunctionReturn(PETSC_SUCCESS); 2704 } 2705 2706 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2707 { 2708 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2709 2710 PetscFunctionBegin; 2711 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2712 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2713 PetscFunctionReturn(PETSC_SUCCESS); 2714 } 2715 2716 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2717 MatGetRow_MPIAIJ, 2718 MatRestoreRow_MPIAIJ, 2719 MatMult_MPIAIJ, 2720 /* 4*/ MatMultAdd_MPIAIJ, 2721 MatMultTranspose_MPIAIJ, 2722 MatMultTransposeAdd_MPIAIJ, 2723 NULL, 2724 NULL, 2725 NULL, 2726 /*10*/ NULL, 2727 NULL, 2728 NULL, 2729 MatSOR_MPIAIJ, 2730 MatTranspose_MPIAIJ, 2731 /*15*/ MatGetInfo_MPIAIJ, 2732 MatEqual_MPIAIJ, 2733 MatGetDiagonal_MPIAIJ, 2734 MatDiagonalScale_MPIAIJ, 2735 MatNorm_MPIAIJ, 2736 /*20*/ MatAssemblyBegin_MPIAIJ, 2737 MatAssemblyEnd_MPIAIJ, 2738 MatSetOption_MPIAIJ, 2739 MatZeroEntries_MPIAIJ, 2740 /*24*/ MatZeroRows_MPIAIJ, 2741 NULL, 2742 NULL, 2743 NULL, 2744 NULL, 2745 /*29*/ MatSetUp_MPI_Hash, 2746 NULL, 2747 NULL, 2748 MatGetDiagonalBlock_MPIAIJ, 2749 NULL, 2750 /*34*/ MatDuplicate_MPIAIJ, 2751 NULL, 2752 NULL, 2753 NULL, 2754 NULL, 2755 /*39*/ MatAXPY_MPIAIJ, 2756 MatCreateSubMatrices_MPIAIJ, 2757 MatIncreaseOverlap_MPIAIJ, 2758 MatGetValues_MPIAIJ, 2759 MatCopy_MPIAIJ, 2760 /*44*/ MatGetRowMax_MPIAIJ, 2761 MatScale_MPIAIJ, 2762 MatShift_MPIAIJ, 2763 MatDiagonalSet_MPIAIJ, 2764 MatZeroRowsColumns_MPIAIJ, 2765 /*49*/ MatSetRandom_MPIAIJ, 2766 MatGetRowIJ_MPIAIJ, 2767 MatRestoreRowIJ_MPIAIJ, 2768 NULL, 2769 NULL, 2770 /*54*/ MatFDColoringCreate_MPIXAIJ, 2771 NULL, 2772 MatSetUnfactored_MPIAIJ, 2773 MatPermute_MPIAIJ, 2774 NULL, 2775 /*59*/ MatCreateSubMatrix_MPIAIJ, 2776 MatDestroy_MPIAIJ, 2777 MatView_MPIAIJ, 2778 NULL, 2779 NULL, 2780 /*64*/ NULL, 2781 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2782 NULL, 2783 NULL, 2784 NULL, 2785 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2786 MatGetRowMinAbs_MPIAIJ, 2787 NULL, 2788 NULL, 2789 NULL, 2790 NULL, 2791 /*75*/ MatFDColoringApply_AIJ, 2792 MatSetFromOptions_MPIAIJ, 2793 NULL, 2794 NULL, 2795 MatFindZeroDiagonals_MPIAIJ, 2796 /*80*/ NULL, 2797 NULL, 2798 NULL, 2799 /*83*/ MatLoad_MPIAIJ, 2800 NULL, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /*89*/ NULL, 2806 NULL, 2807 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2808 NULL, 2809 NULL, 2810 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2811 NULL, 2812 NULL, 2813 NULL, 2814 MatBindToCPU_MPIAIJ, 2815 /*99*/ MatProductSetFromOptions_MPIAIJ, 2816 NULL, 2817 NULL, 2818 MatConjugate_MPIAIJ, 2819 NULL, 2820 /*104*/ MatSetValuesRow_MPIAIJ, 2821 MatRealPart_MPIAIJ, 2822 MatImaginaryPart_MPIAIJ, 2823 NULL, 2824 NULL, 2825 /*109*/ NULL, 2826 NULL, 2827 MatGetRowMin_MPIAIJ, 2828 NULL, 2829 MatMissingDiagonal_MPIAIJ, 2830 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2831 NULL, 2832 MatGetGhosts_MPIAIJ, 2833 NULL, 2834 NULL, 2835 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2836 NULL, 2837 NULL, 2838 NULL, 2839 MatGetMultiProcBlock_MPIAIJ, 2840 /*124*/ MatFindNonzeroRows_MPIAIJ, 2841 MatGetColumnReductions_MPIAIJ, 2842 MatInvertBlockDiagonal_MPIAIJ, 2843 MatInvertVariableBlockDiagonal_MPIAIJ, 2844 MatCreateSubMatricesMPI_MPIAIJ, 2845 /*129*/ NULL, 2846 NULL, 2847 NULL, 2848 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2849 NULL, 2850 /*134*/ NULL, 2851 NULL, 2852 NULL, 2853 NULL, 2854 NULL, 2855 /*139*/ MatSetBlockSizes_MPIAIJ, 2856 NULL, 2857 NULL, 2858 MatFDColoringSetUp_MPIXAIJ, 2859 MatFindOffBlockDiagonalEntries_MPIAIJ, 2860 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2861 /*145*/ NULL, 2862 NULL, 2863 NULL, 2864 MatCreateGraph_Simple_AIJ, 2865 NULL, 2866 /*150*/ NULL, 2867 MatEliminateZeros_MPIAIJ, 2868 MatGetRowSumAbs_MPIAIJ}; 2869 2870 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2871 { 2872 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2873 2874 PetscFunctionBegin; 2875 PetscCall(MatStoreValues(aij->A)); 2876 PetscCall(MatStoreValues(aij->B)); 2877 PetscFunctionReturn(PETSC_SUCCESS); 2878 } 2879 2880 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2881 { 2882 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2883 2884 PetscFunctionBegin; 2885 PetscCall(MatRetrieveValues(aij->A)); 2886 PetscCall(MatRetrieveValues(aij->B)); 2887 PetscFunctionReturn(PETSC_SUCCESS); 2888 } 2889 2890 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2891 { 2892 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2893 PetscMPIInt size; 2894 2895 PetscFunctionBegin; 2896 if (B->hash_active) { 2897 B->ops[0] = b->cops; 2898 B->hash_active = PETSC_FALSE; 2899 } 2900 PetscCall(PetscLayoutSetUp(B->rmap)); 2901 PetscCall(PetscLayoutSetUp(B->cmap)); 2902 2903 #if defined(PETSC_USE_CTABLE) 2904 PetscCall(PetscHMapIDestroy(&b->colmap)); 2905 #else 2906 PetscCall(PetscFree(b->colmap)); 2907 #endif 2908 PetscCall(PetscFree(b->garray)); 2909 PetscCall(VecDestroy(&b->lvec)); 2910 PetscCall(VecScatterDestroy(&b->Mvctx)); 2911 2912 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2913 2914 MatSeqXAIJGetOptions_Private(b->B); 2915 PetscCall(MatDestroy(&b->B)); 2916 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2917 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2918 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2919 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2920 MatSeqXAIJRestoreOptions_Private(b->B); 2921 2922 MatSeqXAIJGetOptions_Private(b->A); 2923 PetscCall(MatDestroy(&b->A)); 2924 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2925 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2926 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2927 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2928 MatSeqXAIJRestoreOptions_Private(b->A); 2929 2930 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2931 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2932 B->preallocated = PETSC_TRUE; 2933 B->was_assembled = PETSC_FALSE; 2934 B->assembled = PETSC_FALSE; 2935 PetscFunctionReturn(PETSC_SUCCESS); 2936 } 2937 2938 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2939 { 2940 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2941 2942 PetscFunctionBegin; 2943 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2944 PetscCall(PetscLayoutSetUp(B->rmap)); 2945 PetscCall(PetscLayoutSetUp(B->cmap)); 2946 2947 #if defined(PETSC_USE_CTABLE) 2948 PetscCall(PetscHMapIDestroy(&b->colmap)); 2949 #else 2950 PetscCall(PetscFree(b->colmap)); 2951 #endif 2952 PetscCall(PetscFree(b->garray)); 2953 PetscCall(VecDestroy(&b->lvec)); 2954 PetscCall(VecScatterDestroy(&b->Mvctx)); 2955 2956 PetscCall(MatResetPreallocation(b->A)); 2957 PetscCall(MatResetPreallocation(b->B)); 2958 B->preallocated = PETSC_TRUE; 2959 B->was_assembled = PETSC_FALSE; 2960 B->assembled = PETSC_FALSE; 2961 PetscFunctionReturn(PETSC_SUCCESS); 2962 } 2963 2964 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2965 { 2966 Mat mat; 2967 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2968 2969 PetscFunctionBegin; 2970 *newmat = NULL; 2971 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2972 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2973 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2974 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2975 a = (Mat_MPIAIJ *)mat->data; 2976 2977 mat->factortype = matin->factortype; 2978 mat->assembled = matin->assembled; 2979 mat->insertmode = NOT_SET_VALUES; 2980 2981 a->size = oldmat->size; 2982 a->rank = oldmat->rank; 2983 a->donotstash = oldmat->donotstash; 2984 a->roworiented = oldmat->roworiented; 2985 a->rowindices = NULL; 2986 a->rowvalues = NULL; 2987 a->getrowactive = PETSC_FALSE; 2988 2989 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2990 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2991 if (matin->hash_active) { 2992 PetscCall(MatSetUp(mat)); 2993 } else { 2994 mat->preallocated = matin->preallocated; 2995 if (oldmat->colmap) { 2996 #if defined(PETSC_USE_CTABLE) 2997 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2998 #else 2999 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3000 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3001 #endif 3002 } else a->colmap = NULL; 3003 if (oldmat->garray) { 3004 PetscInt len; 3005 len = oldmat->B->cmap->n; 3006 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3007 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3008 } else a->garray = NULL; 3009 3010 /* It may happen MatDuplicate is called with a non-assembled matrix 3011 In fact, MatDuplicate only requires the matrix to be preallocated 3012 This may happen inside a DMCreateMatrix_Shell */ 3013 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3014 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3015 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3016 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3017 } 3018 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3019 *newmat = mat; 3020 PetscFunctionReturn(PETSC_SUCCESS); 3021 } 3022 3023 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3024 { 3025 PetscBool isbinary, ishdf5; 3026 3027 PetscFunctionBegin; 3028 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3029 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3030 /* force binary viewer to load .info file if it has not yet done so */ 3031 PetscCall(PetscViewerSetUp(viewer)); 3032 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3033 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3034 if (isbinary) { 3035 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3036 } else if (ishdf5) { 3037 #if defined(PETSC_HAVE_HDF5) 3038 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3039 #else 3040 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3041 #endif 3042 } else { 3043 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3044 } 3045 PetscFunctionReturn(PETSC_SUCCESS); 3046 } 3047 3048 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3049 { 3050 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3051 PetscInt *rowidxs, *colidxs; 3052 PetscScalar *matvals; 3053 3054 PetscFunctionBegin; 3055 PetscCall(PetscViewerSetUp(viewer)); 3056 3057 /* read in matrix header */ 3058 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3059 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3060 M = header[1]; 3061 N = header[2]; 3062 nz = header[3]; 3063 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3064 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3065 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3066 3067 /* set block sizes from the viewer's .info file */ 3068 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3069 /* set global sizes if not set already */ 3070 if (mat->rmap->N < 0) mat->rmap->N = M; 3071 if (mat->cmap->N < 0) mat->cmap->N = N; 3072 PetscCall(PetscLayoutSetUp(mat->rmap)); 3073 PetscCall(PetscLayoutSetUp(mat->cmap)); 3074 3075 /* check if the matrix sizes are correct */ 3076 PetscCall(MatGetSize(mat, &rows, &cols)); 3077 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3078 3079 /* read in row lengths and build row indices */ 3080 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3081 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3082 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3083 rowidxs[0] = 0; 3084 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3085 if (nz != PETSC_MAX_INT) { 3086 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3087 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3088 } 3089 3090 /* read in column indices and matrix values */ 3091 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3092 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3093 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3094 /* store matrix indices and values */ 3095 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3096 PetscCall(PetscFree(rowidxs)); 3097 PetscCall(PetscFree2(colidxs, matvals)); 3098 PetscFunctionReturn(PETSC_SUCCESS); 3099 } 3100 3101 /* Not scalable because of ISAllGather() unless getting all columns. */ 3102 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3103 { 3104 IS iscol_local; 3105 PetscBool isstride; 3106 PetscMPIInt lisstride = 0, gisstride; 3107 3108 PetscFunctionBegin; 3109 /* check if we are grabbing all columns*/ 3110 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3111 3112 if (isstride) { 3113 PetscInt start, len, mstart, mlen; 3114 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3115 PetscCall(ISGetLocalSize(iscol, &len)); 3116 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3117 if (mstart == start && mlen - mstart == len) lisstride = 1; 3118 } 3119 3120 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3121 if (gisstride) { 3122 PetscInt N; 3123 PetscCall(MatGetSize(mat, NULL, &N)); 3124 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3125 PetscCall(ISSetIdentity(iscol_local)); 3126 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3127 } else { 3128 PetscInt cbs; 3129 PetscCall(ISGetBlockSize(iscol, &cbs)); 3130 PetscCall(ISAllGather(iscol, &iscol_local)); 3131 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3132 } 3133 3134 *isseq = iscol_local; 3135 PetscFunctionReturn(PETSC_SUCCESS); 3136 } 3137 3138 /* 3139 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3140 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3141 3142 Input Parameters: 3143 + mat - matrix 3144 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3145 i.e., mat->rstart <= isrow[i] < mat->rend 3146 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3147 i.e., mat->cstart <= iscol[i] < mat->cend 3148 3149 Output Parameters: 3150 + isrow_d - sequential row index set for retrieving mat->A 3151 . iscol_d - sequential column index set for retrieving mat->A 3152 . iscol_o - sequential column index set for retrieving mat->B 3153 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3154 */ 3155 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3156 { 3157 Vec x, cmap; 3158 const PetscInt *is_idx; 3159 PetscScalar *xarray, *cmaparray; 3160 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3161 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3162 Mat B = a->B; 3163 Vec lvec = a->lvec, lcmap; 3164 PetscInt i, cstart, cend, Bn = B->cmap->N; 3165 MPI_Comm comm; 3166 VecScatter Mvctx = a->Mvctx; 3167 3168 PetscFunctionBegin; 3169 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3170 PetscCall(ISGetLocalSize(iscol, &ncols)); 3171 3172 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3173 PetscCall(MatCreateVecs(mat, &x, NULL)); 3174 PetscCall(VecSet(x, -1.0)); 3175 PetscCall(VecDuplicate(x, &cmap)); 3176 PetscCall(VecSet(cmap, -1.0)); 3177 3178 /* Get start indices */ 3179 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3180 isstart -= ncols; 3181 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3182 3183 PetscCall(ISGetIndices(iscol, &is_idx)); 3184 PetscCall(VecGetArray(x, &xarray)); 3185 PetscCall(VecGetArray(cmap, &cmaparray)); 3186 PetscCall(PetscMalloc1(ncols, &idx)); 3187 for (i = 0; i < ncols; i++) { 3188 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3189 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3190 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3191 } 3192 PetscCall(VecRestoreArray(x, &xarray)); 3193 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3194 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3195 3196 /* Get iscol_d */ 3197 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3198 PetscCall(ISGetBlockSize(iscol, &i)); 3199 PetscCall(ISSetBlockSize(*iscol_d, i)); 3200 3201 /* Get isrow_d */ 3202 PetscCall(ISGetLocalSize(isrow, &m)); 3203 rstart = mat->rmap->rstart; 3204 PetscCall(PetscMalloc1(m, &idx)); 3205 PetscCall(ISGetIndices(isrow, &is_idx)); 3206 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3207 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3208 3209 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3210 PetscCall(ISGetBlockSize(isrow, &i)); 3211 PetscCall(ISSetBlockSize(*isrow_d, i)); 3212 3213 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3214 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3215 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3216 3217 PetscCall(VecDuplicate(lvec, &lcmap)); 3218 3219 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3220 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3221 3222 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3223 /* off-process column indices */ 3224 count = 0; 3225 PetscCall(PetscMalloc1(Bn, &idx)); 3226 PetscCall(PetscMalloc1(Bn, &cmap1)); 3227 3228 PetscCall(VecGetArray(lvec, &xarray)); 3229 PetscCall(VecGetArray(lcmap, &cmaparray)); 3230 for (i = 0; i < Bn; i++) { 3231 if (PetscRealPart(xarray[i]) > -1.0) { 3232 idx[count] = i; /* local column index in off-diagonal part B */ 3233 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3234 count++; 3235 } 3236 } 3237 PetscCall(VecRestoreArray(lvec, &xarray)); 3238 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3239 3240 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3241 /* cannot ensure iscol_o has same blocksize as iscol! */ 3242 3243 PetscCall(PetscFree(idx)); 3244 *garray = cmap1; 3245 3246 PetscCall(VecDestroy(&x)); 3247 PetscCall(VecDestroy(&cmap)); 3248 PetscCall(VecDestroy(&lcmap)); 3249 PetscFunctionReturn(PETSC_SUCCESS); 3250 } 3251 3252 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3253 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3254 { 3255 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3256 Mat M = NULL; 3257 MPI_Comm comm; 3258 IS iscol_d, isrow_d, iscol_o; 3259 Mat Asub = NULL, Bsub = NULL; 3260 PetscInt n; 3261 3262 PetscFunctionBegin; 3263 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3264 3265 if (call == MAT_REUSE_MATRIX) { 3266 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3267 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3268 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3269 3270 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3271 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3272 3273 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3274 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3275 3276 /* Update diagonal and off-diagonal portions of submat */ 3277 asub = (Mat_MPIAIJ *)(*submat)->data; 3278 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3279 PetscCall(ISGetLocalSize(iscol_o, &n)); 3280 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3281 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3282 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3283 3284 } else { /* call == MAT_INITIAL_MATRIX) */ 3285 const PetscInt *garray; 3286 PetscInt BsubN; 3287 3288 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3289 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3290 3291 /* Create local submatrices Asub and Bsub */ 3292 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3293 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3294 3295 /* Create submatrix M */ 3296 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3297 3298 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3299 asub = (Mat_MPIAIJ *)M->data; 3300 3301 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3302 n = asub->B->cmap->N; 3303 if (BsubN > n) { 3304 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3305 const PetscInt *idx; 3306 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3307 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3308 3309 PetscCall(PetscMalloc1(n, &idx_new)); 3310 j = 0; 3311 PetscCall(ISGetIndices(iscol_o, &idx)); 3312 for (i = 0; i < n; i++) { 3313 if (j >= BsubN) break; 3314 while (subgarray[i] > garray[j]) j++; 3315 3316 if (subgarray[i] == garray[j]) { 3317 idx_new[i] = idx[j++]; 3318 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3319 } 3320 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3321 3322 PetscCall(ISDestroy(&iscol_o)); 3323 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3324 3325 } else if (BsubN < n) { 3326 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3327 } 3328 3329 PetscCall(PetscFree(garray)); 3330 *submat = M; 3331 3332 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3333 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3334 PetscCall(ISDestroy(&isrow_d)); 3335 3336 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3337 PetscCall(ISDestroy(&iscol_d)); 3338 3339 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3340 PetscCall(ISDestroy(&iscol_o)); 3341 } 3342 PetscFunctionReturn(PETSC_SUCCESS); 3343 } 3344 3345 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3346 { 3347 IS iscol_local = NULL, isrow_d; 3348 PetscInt csize; 3349 PetscInt n, i, j, start, end; 3350 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3351 MPI_Comm comm; 3352 3353 PetscFunctionBegin; 3354 /* If isrow has same processor distribution as mat, 3355 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3356 if (call == MAT_REUSE_MATRIX) { 3357 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3358 if (isrow_d) { 3359 sameRowDist = PETSC_TRUE; 3360 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3361 } else { 3362 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3363 if (iscol_local) { 3364 sameRowDist = PETSC_TRUE; 3365 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3366 } 3367 } 3368 } else { 3369 /* Check if isrow has same processor distribution as mat */ 3370 sameDist[0] = PETSC_FALSE; 3371 PetscCall(ISGetLocalSize(isrow, &n)); 3372 if (!n) { 3373 sameDist[0] = PETSC_TRUE; 3374 } else { 3375 PetscCall(ISGetMinMax(isrow, &i, &j)); 3376 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3377 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3378 } 3379 3380 /* Check if iscol has same processor distribution as mat */ 3381 sameDist[1] = PETSC_FALSE; 3382 PetscCall(ISGetLocalSize(iscol, &n)); 3383 if (!n) { 3384 sameDist[1] = PETSC_TRUE; 3385 } else { 3386 PetscCall(ISGetMinMax(iscol, &i, &j)); 3387 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3388 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3389 } 3390 3391 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3392 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3393 sameRowDist = tsameDist[0]; 3394 } 3395 3396 if (sameRowDist) { 3397 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3398 /* isrow and iscol have same processor distribution as mat */ 3399 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3400 PetscFunctionReturn(PETSC_SUCCESS); 3401 } else { /* sameRowDist */ 3402 /* isrow has same processor distribution as mat */ 3403 if (call == MAT_INITIAL_MATRIX) { 3404 PetscBool sorted; 3405 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3406 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3407 PetscCall(ISGetSize(iscol, &i)); 3408 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3409 3410 PetscCall(ISSorted(iscol_local, &sorted)); 3411 if (sorted) { 3412 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3413 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3414 PetscFunctionReturn(PETSC_SUCCESS); 3415 } 3416 } else { /* call == MAT_REUSE_MATRIX */ 3417 IS iscol_sub; 3418 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3419 if (iscol_sub) { 3420 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3421 PetscFunctionReturn(PETSC_SUCCESS); 3422 } 3423 } 3424 } 3425 } 3426 3427 /* General case: iscol -> iscol_local which has global size of iscol */ 3428 if (call == MAT_REUSE_MATRIX) { 3429 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3430 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3431 } else { 3432 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3433 } 3434 3435 PetscCall(ISGetLocalSize(iscol, &csize)); 3436 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3437 3438 if (call == MAT_INITIAL_MATRIX) { 3439 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3440 PetscCall(ISDestroy(&iscol_local)); 3441 } 3442 PetscFunctionReturn(PETSC_SUCCESS); 3443 } 3444 3445 /*@C 3446 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3447 and "off-diagonal" part of the matrix in CSR format. 3448 3449 Collective 3450 3451 Input Parameters: 3452 + comm - MPI communicator 3453 . A - "diagonal" portion of matrix 3454 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3455 - garray - global index of `B` columns 3456 3457 Output Parameter: 3458 . mat - the matrix, with input `A` as its local diagonal matrix 3459 3460 Level: advanced 3461 3462 Notes: 3463 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3464 3465 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3466 3467 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3468 @*/ 3469 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3470 { 3471 Mat_MPIAIJ *maij; 3472 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3473 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3474 const PetscScalar *oa; 3475 Mat Bnew; 3476 PetscInt m, n, N; 3477 MatType mpi_mat_type; 3478 3479 PetscFunctionBegin; 3480 PetscCall(MatCreate(comm, mat)); 3481 PetscCall(MatGetSize(A, &m, &n)); 3482 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3483 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3484 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3485 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3486 3487 /* Get global columns of mat */ 3488 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3489 3490 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3491 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3492 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3493 PetscCall(MatSetType(*mat, mpi_mat_type)); 3494 3495 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3496 maij = (Mat_MPIAIJ *)(*mat)->data; 3497 3498 (*mat)->preallocated = PETSC_TRUE; 3499 3500 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3501 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3502 3503 /* Set A as diagonal portion of *mat */ 3504 maij->A = A; 3505 3506 nz = oi[m]; 3507 for (i = 0; i < nz; i++) { 3508 col = oj[i]; 3509 oj[i] = garray[col]; 3510 } 3511 3512 /* Set Bnew as off-diagonal portion of *mat */ 3513 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3514 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3515 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3516 bnew = (Mat_SeqAIJ *)Bnew->data; 3517 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3518 maij->B = Bnew; 3519 3520 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3521 3522 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3523 b->free_a = PETSC_FALSE; 3524 b->free_ij = PETSC_FALSE; 3525 PetscCall(MatDestroy(&B)); 3526 3527 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3528 bnew->free_a = PETSC_TRUE; 3529 bnew->free_ij = PETSC_TRUE; 3530 3531 /* condense columns of maij->B */ 3532 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3533 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3534 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3535 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3536 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3537 PetscFunctionReturn(PETSC_SUCCESS); 3538 } 3539 3540 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3541 3542 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3543 { 3544 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3545 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3546 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3547 Mat M, Msub, B = a->B; 3548 MatScalar *aa; 3549 Mat_SeqAIJ *aij; 3550 PetscInt *garray = a->garray, *colsub, Ncols; 3551 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3552 IS iscol_sub, iscmap; 3553 const PetscInt *is_idx, *cmap; 3554 PetscBool allcolumns = PETSC_FALSE; 3555 MPI_Comm comm; 3556 3557 PetscFunctionBegin; 3558 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3559 if (call == MAT_REUSE_MATRIX) { 3560 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3561 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3562 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3563 3564 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3565 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3566 3567 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3568 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3569 3570 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3571 3572 } else { /* call == MAT_INITIAL_MATRIX) */ 3573 PetscBool flg; 3574 3575 PetscCall(ISGetLocalSize(iscol, &n)); 3576 PetscCall(ISGetSize(iscol, &Ncols)); 3577 3578 /* (1) iscol -> nonscalable iscol_local */ 3579 /* Check for special case: each processor gets entire matrix columns */ 3580 PetscCall(ISIdentity(iscol_local, &flg)); 3581 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3582 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3583 if (allcolumns) { 3584 iscol_sub = iscol_local; 3585 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3586 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3587 3588 } else { 3589 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3590 PetscInt *idx, *cmap1, k; 3591 PetscCall(PetscMalloc1(Ncols, &idx)); 3592 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3593 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3594 count = 0; 3595 k = 0; 3596 for (i = 0; i < Ncols; i++) { 3597 j = is_idx[i]; 3598 if (j >= cstart && j < cend) { 3599 /* diagonal part of mat */ 3600 idx[count] = j; 3601 cmap1[count++] = i; /* column index in submat */ 3602 } else if (Bn) { 3603 /* off-diagonal part of mat */ 3604 if (j == garray[k]) { 3605 idx[count] = j; 3606 cmap1[count++] = i; /* column index in submat */ 3607 } else if (j > garray[k]) { 3608 while (j > garray[k] && k < Bn - 1) k++; 3609 if (j == garray[k]) { 3610 idx[count] = j; 3611 cmap1[count++] = i; /* column index in submat */ 3612 } 3613 } 3614 } 3615 } 3616 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3617 3618 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3619 PetscCall(ISGetBlockSize(iscol, &cbs)); 3620 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3621 3622 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3623 } 3624 3625 /* (3) Create sequential Msub */ 3626 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3627 } 3628 3629 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3630 aij = (Mat_SeqAIJ *)(Msub)->data; 3631 ii = aij->i; 3632 PetscCall(ISGetIndices(iscmap, &cmap)); 3633 3634 /* 3635 m - number of local rows 3636 Ncols - number of columns (same on all processors) 3637 rstart - first row in new global matrix generated 3638 */ 3639 PetscCall(MatGetSize(Msub, &m, NULL)); 3640 3641 if (call == MAT_INITIAL_MATRIX) { 3642 /* (4) Create parallel newmat */ 3643 PetscMPIInt rank, size; 3644 PetscInt csize; 3645 3646 PetscCallMPI(MPI_Comm_size(comm, &size)); 3647 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3648 3649 /* 3650 Determine the number of non-zeros in the diagonal and off-diagonal 3651 portions of the matrix in order to do correct preallocation 3652 */ 3653 3654 /* first get start and end of "diagonal" columns */ 3655 PetscCall(ISGetLocalSize(iscol, &csize)); 3656 if (csize == PETSC_DECIDE) { 3657 PetscCall(ISGetSize(isrow, &mglobal)); 3658 if (mglobal == Ncols) { /* square matrix */ 3659 nlocal = m; 3660 } else { 3661 nlocal = Ncols / size + ((Ncols % size) > rank); 3662 } 3663 } else { 3664 nlocal = csize; 3665 } 3666 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3667 rstart = rend - nlocal; 3668 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3669 3670 /* next, compute all the lengths */ 3671 jj = aij->j; 3672 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3673 olens = dlens + m; 3674 for (i = 0; i < m; i++) { 3675 jend = ii[i + 1] - ii[i]; 3676 olen = 0; 3677 dlen = 0; 3678 for (j = 0; j < jend; j++) { 3679 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3680 else dlen++; 3681 jj++; 3682 } 3683 olens[i] = olen; 3684 dlens[i] = dlen; 3685 } 3686 3687 PetscCall(ISGetBlockSize(isrow, &bs)); 3688 PetscCall(ISGetBlockSize(iscol, &cbs)); 3689 3690 PetscCall(MatCreate(comm, &M)); 3691 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3692 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3693 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3694 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3695 PetscCall(PetscFree(dlens)); 3696 3697 } else { /* call == MAT_REUSE_MATRIX */ 3698 M = *newmat; 3699 PetscCall(MatGetLocalSize(M, &i, NULL)); 3700 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3701 PetscCall(MatZeroEntries(M)); 3702 /* 3703 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3704 rather than the slower MatSetValues(). 3705 */ 3706 M->was_assembled = PETSC_TRUE; 3707 M->assembled = PETSC_FALSE; 3708 } 3709 3710 /* (5) Set values of Msub to *newmat */ 3711 PetscCall(PetscMalloc1(count, &colsub)); 3712 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3713 3714 jj = aij->j; 3715 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3716 for (i = 0; i < m; i++) { 3717 row = rstart + i; 3718 nz = ii[i + 1] - ii[i]; 3719 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3720 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3721 jj += nz; 3722 aa += nz; 3723 } 3724 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3725 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3726 3727 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3728 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3729 3730 PetscCall(PetscFree(colsub)); 3731 3732 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3733 if (call == MAT_INITIAL_MATRIX) { 3734 *newmat = M; 3735 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3736 PetscCall(MatDestroy(&Msub)); 3737 3738 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3739 PetscCall(ISDestroy(&iscol_sub)); 3740 3741 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3742 PetscCall(ISDestroy(&iscmap)); 3743 3744 if (iscol_local) { 3745 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3746 PetscCall(ISDestroy(&iscol_local)); 3747 } 3748 } 3749 PetscFunctionReturn(PETSC_SUCCESS); 3750 } 3751 3752 /* 3753 Not great since it makes two copies of the submatrix, first an SeqAIJ 3754 in local and then by concatenating the local matrices the end result. 3755 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3756 3757 This requires a sequential iscol with all indices. 3758 */ 3759 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3760 { 3761 PetscMPIInt rank, size; 3762 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3763 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3764 Mat M, Mreuse; 3765 MatScalar *aa, *vwork; 3766 MPI_Comm comm; 3767 Mat_SeqAIJ *aij; 3768 PetscBool colflag, allcolumns = PETSC_FALSE; 3769 3770 PetscFunctionBegin; 3771 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3772 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3773 PetscCallMPI(MPI_Comm_size(comm, &size)); 3774 3775 /* Check for special case: each processor gets entire matrix columns */ 3776 PetscCall(ISIdentity(iscol, &colflag)); 3777 PetscCall(ISGetLocalSize(iscol, &n)); 3778 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3779 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3780 3781 if (call == MAT_REUSE_MATRIX) { 3782 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3783 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3784 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3785 } else { 3786 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3787 } 3788 3789 /* 3790 m - number of local rows 3791 n - number of columns (same on all processors) 3792 rstart - first row in new global matrix generated 3793 */ 3794 PetscCall(MatGetSize(Mreuse, &m, &n)); 3795 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3796 if (call == MAT_INITIAL_MATRIX) { 3797 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3798 ii = aij->i; 3799 jj = aij->j; 3800 3801 /* 3802 Determine the number of non-zeros in the diagonal and off-diagonal 3803 portions of the matrix in order to do correct preallocation 3804 */ 3805 3806 /* first get start and end of "diagonal" columns */ 3807 if (csize == PETSC_DECIDE) { 3808 PetscCall(ISGetSize(isrow, &mglobal)); 3809 if (mglobal == n) { /* square matrix */ 3810 nlocal = m; 3811 } else { 3812 nlocal = n / size + ((n % size) > rank); 3813 } 3814 } else { 3815 nlocal = csize; 3816 } 3817 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3818 rstart = rend - nlocal; 3819 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3820 3821 /* next, compute all the lengths */ 3822 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3823 olens = dlens + m; 3824 for (i = 0; i < m; i++) { 3825 jend = ii[i + 1] - ii[i]; 3826 olen = 0; 3827 dlen = 0; 3828 for (j = 0; j < jend; j++) { 3829 if (*jj < rstart || *jj >= rend) olen++; 3830 else dlen++; 3831 jj++; 3832 } 3833 olens[i] = olen; 3834 dlens[i] = dlen; 3835 } 3836 PetscCall(MatCreate(comm, &M)); 3837 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3838 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3839 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3840 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3841 PetscCall(PetscFree(dlens)); 3842 } else { 3843 PetscInt ml, nl; 3844 3845 M = *newmat; 3846 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3847 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3848 PetscCall(MatZeroEntries(M)); 3849 /* 3850 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3851 rather than the slower MatSetValues(). 3852 */ 3853 M->was_assembled = PETSC_TRUE; 3854 M->assembled = PETSC_FALSE; 3855 } 3856 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3857 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3858 ii = aij->i; 3859 jj = aij->j; 3860 3861 /* trigger copy to CPU if needed */ 3862 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3863 for (i = 0; i < m; i++) { 3864 row = rstart + i; 3865 nz = ii[i + 1] - ii[i]; 3866 cwork = jj; 3867 jj = PetscSafePointerPlusOffset(jj, nz); 3868 vwork = aa; 3869 aa = PetscSafePointerPlusOffset(aa, nz); 3870 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3871 } 3872 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3873 3874 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3875 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3876 *newmat = M; 3877 3878 /* save submatrix used in processor for next request */ 3879 if (call == MAT_INITIAL_MATRIX) { 3880 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3881 PetscCall(MatDestroy(&Mreuse)); 3882 } 3883 PetscFunctionReturn(PETSC_SUCCESS); 3884 } 3885 3886 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3887 { 3888 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3889 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3890 const PetscInt *JJ; 3891 PetscBool nooffprocentries; 3892 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3893 3894 PetscFunctionBegin; 3895 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3896 3897 PetscCall(PetscLayoutSetUp(B->rmap)); 3898 PetscCall(PetscLayoutSetUp(B->cmap)); 3899 m = B->rmap->n; 3900 cstart = B->cmap->rstart; 3901 cend = B->cmap->rend; 3902 rstart = B->rmap->rstart; 3903 3904 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3905 3906 if (PetscDefined(USE_DEBUG)) { 3907 for (i = 0; i < m; i++) { 3908 nnz = Ii[i + 1] - Ii[i]; 3909 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3910 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3911 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3912 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3913 } 3914 } 3915 3916 for (i = 0; i < m; i++) { 3917 nnz = Ii[i + 1] - Ii[i]; 3918 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3919 nnz_max = PetscMax(nnz_max, nnz); 3920 d = 0; 3921 for (j = 0; j < nnz; j++) { 3922 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3923 } 3924 d_nnz[i] = d; 3925 o_nnz[i] = nnz - d; 3926 } 3927 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3928 PetscCall(PetscFree2(d_nnz, o_nnz)); 3929 3930 for (i = 0; i < m; i++) { 3931 ii = i + rstart; 3932 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES)); 3933 } 3934 nooffprocentries = B->nooffprocentries; 3935 B->nooffprocentries = PETSC_TRUE; 3936 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3937 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3938 B->nooffprocentries = nooffprocentries; 3939 3940 /* count number of entries below block diagonal */ 3941 PetscCall(PetscFree(Aij->ld)); 3942 PetscCall(PetscCalloc1(m, &ld)); 3943 Aij->ld = ld; 3944 for (i = 0; i < m; i++) { 3945 nnz = Ii[i + 1] - Ii[i]; 3946 j = 0; 3947 while (j < nnz && J[j] < cstart) j++; 3948 ld[i] = j; 3949 if (J) J += nnz; 3950 } 3951 3952 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3953 PetscFunctionReturn(PETSC_SUCCESS); 3954 } 3955 3956 /*@ 3957 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3958 (the default parallel PETSc format). 3959 3960 Collective 3961 3962 Input Parameters: 3963 + B - the matrix 3964 . i - the indices into `j` for the start of each local row (indices start with zero) 3965 . j - the column indices for each local row (indices start with zero) 3966 - v - optional values in the matrix 3967 3968 Level: developer 3969 3970 Notes: 3971 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3972 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3973 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3974 3975 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3976 3977 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3978 3979 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3980 3981 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3982 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3983 3984 The format which is used for the sparse matrix input, is equivalent to a 3985 row-major ordering.. i.e for the following matrix, the input data expected is 3986 as shown 3987 .vb 3988 1 0 0 3989 2 0 3 P0 3990 ------- 3991 4 5 6 P1 3992 3993 Process0 [P0] rows_owned=[0,1] 3994 i = {0,1,3} [size = nrow+1 = 2+1] 3995 j = {0,0,2} [size = 3] 3996 v = {1,2,3} [size = 3] 3997 3998 Process1 [P1] rows_owned=[2] 3999 i = {0,3} [size = nrow+1 = 1+1] 4000 j = {0,1,2} [size = 3] 4001 v = {4,5,6} [size = 3] 4002 .ve 4003 4004 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4005 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4006 @*/ 4007 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4008 { 4009 PetscFunctionBegin; 4010 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4011 PetscFunctionReturn(PETSC_SUCCESS); 4012 } 4013 4014 /*@C 4015 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4016 (the default parallel PETSc format). For good matrix assembly performance 4017 the user should preallocate the matrix storage by setting the parameters 4018 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4019 4020 Collective 4021 4022 Input Parameters: 4023 + B - the matrix 4024 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4025 (same value is used for all local rows) 4026 . d_nnz - array containing the number of nonzeros in the various rows of the 4027 DIAGONAL portion of the local submatrix (possibly different for each row) 4028 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4029 The size of this array is equal to the number of local rows, i.e 'm'. 4030 For matrices that will be factored, you must leave room for (and set) 4031 the diagonal entry even if it is zero. 4032 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4033 submatrix (same value is used for all local rows). 4034 - o_nnz - array containing the number of nonzeros in the various rows of the 4035 OFF-DIAGONAL portion of the local submatrix (possibly different for 4036 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4037 structure. The size of this array is equal to the number 4038 of local rows, i.e 'm'. 4039 4040 Example Usage: 4041 Consider the following 8x8 matrix with 34 non-zero values, that is 4042 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4043 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4044 as follows 4045 4046 .vb 4047 1 2 0 | 0 3 0 | 0 4 4048 Proc0 0 5 6 | 7 0 0 | 8 0 4049 9 0 10 | 11 0 0 | 12 0 4050 ------------------------------------- 4051 13 0 14 | 15 16 17 | 0 0 4052 Proc1 0 18 0 | 19 20 21 | 0 0 4053 0 0 0 | 22 23 0 | 24 0 4054 ------------------------------------- 4055 Proc2 25 26 27 | 0 0 28 | 29 0 4056 30 0 0 | 31 32 33 | 0 34 4057 .ve 4058 4059 This can be represented as a collection of submatrices as 4060 .vb 4061 A B C 4062 D E F 4063 G H I 4064 .ve 4065 4066 Where the submatrices A,B,C are owned by proc0, D,E,F are 4067 owned by proc1, G,H,I are owned by proc2. 4068 4069 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4070 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4071 The 'M','N' parameters are 8,8, and have the same values on all procs. 4072 4073 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4074 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4075 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4076 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4077 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4078 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4079 4080 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4081 allocated for every row of the local diagonal submatrix, and `o_nz` 4082 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4083 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4084 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4085 In this case, the values of `d_nz`, `o_nz` are 4086 .vb 4087 proc0 dnz = 2, o_nz = 2 4088 proc1 dnz = 3, o_nz = 2 4089 proc2 dnz = 1, o_nz = 4 4090 .ve 4091 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4092 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4093 for proc3. i.e we are using 12+15+10=37 storage locations to store 4094 34 values. 4095 4096 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4097 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4098 In the above case the values for `d_nnz`, `o_nnz` are 4099 .vb 4100 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4101 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4102 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4103 .ve 4104 Here the space allocated is sum of all the above values i.e 34, and 4105 hence pre-allocation is perfect. 4106 4107 Level: intermediate 4108 4109 Notes: 4110 If the *_nnz parameter is given then the *_nz parameter is ignored 4111 4112 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4113 storage. The stored row and column indices begin with zero. 4114 See [Sparse Matrices](sec_matsparse) for details. 4115 4116 The parallel matrix is partitioned such that the first m0 rows belong to 4117 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4118 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4119 4120 The DIAGONAL portion of the local submatrix of a processor can be defined 4121 as the submatrix which is obtained by extraction the part corresponding to 4122 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4123 first row that belongs to the processor, r2 is the last row belonging to 4124 the this processor, and c1-c2 is range of indices of the local part of a 4125 vector suitable for applying the matrix to. This is an mxn matrix. In the 4126 common case of a square matrix, the row and column ranges are the same and 4127 the DIAGONAL part is also square. The remaining portion of the local 4128 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4129 4130 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4131 4132 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4133 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4134 You can also run with the option `-info` and look for messages with the string 4135 malloc in them to see if additional memory allocation was needed. 4136 4137 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4138 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4139 @*/ 4140 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4141 { 4142 PetscFunctionBegin; 4143 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4144 PetscValidType(B, 1); 4145 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4146 PetscFunctionReturn(PETSC_SUCCESS); 4147 } 4148 4149 /*@ 4150 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4151 CSR format for the local rows. 4152 4153 Collective 4154 4155 Input Parameters: 4156 + comm - MPI communicator 4157 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4158 . n - This value should be the same as the local size used in creating the 4159 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4160 calculated if `N` is given) For square matrices n is almost always `m`. 4161 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4162 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4163 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4164 . j - global column indices 4165 - a - optional matrix values 4166 4167 Output Parameter: 4168 . mat - the matrix 4169 4170 Level: intermediate 4171 4172 Notes: 4173 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4174 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4175 called this routine. Use `MatCreateMPIAIJWithSplitArray()` to avoid needing to copy the arrays. 4176 4177 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4178 4179 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4180 4181 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4182 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4183 4184 The format which is used for the sparse matrix input, is equivalent to a 4185 row-major ordering.. i.e for the following matrix, the input data expected is 4186 as shown 4187 .vb 4188 1 0 0 4189 2 0 3 P0 4190 ------- 4191 4 5 6 P1 4192 4193 Process0 [P0] rows_owned=[0,1] 4194 i = {0,1,3} [size = nrow+1 = 2+1] 4195 j = {0,0,2} [size = 3] 4196 v = {1,2,3} [size = 3] 4197 4198 Process1 [P1] rows_owned=[2] 4199 i = {0,3} [size = nrow+1 = 1+1] 4200 j = {0,1,2} [size = 3] 4201 v = {4,5,6} [size = 3] 4202 .ve 4203 4204 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4205 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4206 @*/ 4207 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4208 { 4209 PetscFunctionBegin; 4210 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4211 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4212 PetscCall(MatCreate(comm, mat)); 4213 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4214 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4215 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4216 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4217 PetscFunctionReturn(PETSC_SUCCESS); 4218 } 4219 4220 /*@ 4221 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4222 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4223 from `MatCreateMPIAIJWithArrays()` 4224 4225 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4226 4227 Collective 4228 4229 Input Parameters: 4230 + mat - the matrix 4231 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4232 . n - This value should be the same as the local size used in creating the 4233 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4234 calculated if N is given) For square matrices n is almost always m. 4235 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4236 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4237 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4238 . J - column indices 4239 - v - matrix values 4240 4241 Level: deprecated 4242 4243 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4244 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4245 @*/ 4246 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4247 { 4248 PetscInt nnz, i; 4249 PetscBool nooffprocentries; 4250 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4251 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4252 PetscScalar *ad, *ao; 4253 PetscInt ldi, Iii, md; 4254 const PetscInt *Adi = Ad->i; 4255 PetscInt *ld = Aij->ld; 4256 4257 PetscFunctionBegin; 4258 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4259 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4260 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4261 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4262 4263 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4264 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4265 4266 for (i = 0; i < m; i++) { 4267 if (PetscDefined(USE_DEBUG)) { 4268 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4269 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4270 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4271 } 4272 } 4273 nnz = Ii[i + 1] - Ii[i]; 4274 Iii = Ii[i]; 4275 ldi = ld[i]; 4276 md = Adi[i + 1] - Adi[i]; 4277 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4278 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4279 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4280 ad += md; 4281 ao += nnz - md; 4282 } 4283 nooffprocentries = mat->nooffprocentries; 4284 mat->nooffprocentries = PETSC_TRUE; 4285 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4286 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4287 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4288 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4289 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4290 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4291 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4292 mat->nooffprocentries = nooffprocentries; 4293 PetscFunctionReturn(PETSC_SUCCESS); 4294 } 4295 4296 /*@ 4297 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4298 4299 Collective 4300 4301 Input Parameters: 4302 + mat - the matrix 4303 - v - matrix values, stored by row 4304 4305 Level: intermediate 4306 4307 Notes: 4308 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4309 4310 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4311 4312 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4313 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4314 @*/ 4315 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4316 { 4317 PetscInt nnz, i, m; 4318 PetscBool nooffprocentries; 4319 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4320 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4321 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4322 PetscScalar *ad, *ao; 4323 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4324 PetscInt ldi, Iii, md; 4325 PetscInt *ld = Aij->ld; 4326 4327 PetscFunctionBegin; 4328 m = mat->rmap->n; 4329 4330 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4331 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4332 Iii = 0; 4333 for (i = 0; i < m; i++) { 4334 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4335 ldi = ld[i]; 4336 md = Adi[i + 1] - Adi[i]; 4337 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4338 ad += md; 4339 if (ao) { 4340 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4341 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4342 ao += nnz - md; 4343 } 4344 Iii += nnz; 4345 } 4346 nooffprocentries = mat->nooffprocentries; 4347 mat->nooffprocentries = PETSC_TRUE; 4348 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4349 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4350 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4351 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4352 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4353 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4354 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4355 mat->nooffprocentries = nooffprocentries; 4356 PetscFunctionReturn(PETSC_SUCCESS); 4357 } 4358 4359 /*@C 4360 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4361 (the default parallel PETSc format). For good matrix assembly performance 4362 the user should preallocate the matrix storage by setting the parameters 4363 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4364 4365 Collective 4366 4367 Input Parameters: 4368 + comm - MPI communicator 4369 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4370 This value should be the same as the local size used in creating the 4371 y vector for the matrix-vector product y = Ax. 4372 . n - This value should be the same as the local size used in creating the 4373 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4374 calculated if N is given) For square matrices n is almost always m. 4375 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4376 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4377 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4378 (same value is used for all local rows) 4379 . d_nnz - array containing the number of nonzeros in the various rows of the 4380 DIAGONAL portion of the local submatrix (possibly different for each row) 4381 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4382 The size of this array is equal to the number of local rows, i.e 'm'. 4383 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4384 submatrix (same value is used for all local rows). 4385 - o_nnz - array containing the number of nonzeros in the various rows of the 4386 OFF-DIAGONAL portion of the local submatrix (possibly different for 4387 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4388 structure. The size of this array is equal to the number 4389 of local rows, i.e 'm'. 4390 4391 Output Parameter: 4392 . A - the matrix 4393 4394 Options Database Keys: 4395 + -mat_no_inode - Do not use inodes 4396 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4397 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4398 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4399 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4400 4401 Level: intermediate 4402 4403 Notes: 4404 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4405 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4406 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4407 4408 If the *_nnz parameter is given then the *_nz parameter is ignored 4409 4410 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4411 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4412 storage requirements for this matrix. 4413 4414 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4415 processor than it must be used on all processors that share the object for 4416 that argument. 4417 4418 The user MUST specify either the local or global matrix dimensions 4419 (possibly both). 4420 4421 The parallel matrix is partitioned across processors such that the 4422 first m0 rows belong to process 0, the next m1 rows belong to 4423 process 1, the next m2 rows belong to process 2 etc.. where 4424 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4425 values corresponding to [m x N] submatrix. 4426 4427 The columns are logically partitioned with the n0 columns belonging 4428 to 0th partition, the next n1 columns belonging to the next 4429 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4430 4431 The DIAGONAL portion of the local submatrix on any given processor 4432 is the submatrix corresponding to the rows and columns m,n 4433 corresponding to the given processor. i.e diagonal matrix on 4434 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4435 etc. The remaining portion of the local submatrix [m x (N-n)] 4436 constitute the OFF-DIAGONAL portion. The example below better 4437 illustrates this concept. 4438 4439 For a square global matrix we define each processor's diagonal portion 4440 to be its local rows and the corresponding columns (a square submatrix); 4441 each processor's off-diagonal portion encompasses the remainder of the 4442 local matrix (a rectangular submatrix). 4443 4444 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4445 4446 When calling this routine with a single process communicator, a matrix of 4447 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4448 type of communicator, use the construction mechanism 4449 .vb 4450 MatCreate(..., &A); 4451 MatSetType(A, MATMPIAIJ); 4452 MatSetSizes(A, m, n, M, N); 4453 MatMPIAIJSetPreallocation(A, ...); 4454 .ve 4455 4456 By default, this format uses inodes (identical nodes) when possible. 4457 We search for consecutive rows with the same nonzero structure, thereby 4458 reusing matrix information to achieve increased efficiency. 4459 4460 Example Usage: 4461 Consider the following 8x8 matrix with 34 non-zero values, that is 4462 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4463 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4464 as follows 4465 4466 .vb 4467 1 2 0 | 0 3 0 | 0 4 4468 Proc0 0 5 6 | 7 0 0 | 8 0 4469 9 0 10 | 11 0 0 | 12 0 4470 ------------------------------------- 4471 13 0 14 | 15 16 17 | 0 0 4472 Proc1 0 18 0 | 19 20 21 | 0 0 4473 0 0 0 | 22 23 0 | 24 0 4474 ------------------------------------- 4475 Proc2 25 26 27 | 0 0 28 | 29 0 4476 30 0 0 | 31 32 33 | 0 34 4477 .ve 4478 4479 This can be represented as a collection of submatrices as 4480 4481 .vb 4482 A B C 4483 D E F 4484 G H I 4485 .ve 4486 4487 Where the submatrices A,B,C are owned by proc0, D,E,F are 4488 owned by proc1, G,H,I are owned by proc2. 4489 4490 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4491 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4492 The 'M','N' parameters are 8,8, and have the same values on all procs. 4493 4494 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4495 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4496 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4497 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4498 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4499 matrix, ans [DF] as another SeqAIJ matrix. 4500 4501 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4502 allocated for every row of the local diagonal submatrix, and `o_nz` 4503 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4504 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4505 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4506 In this case, the values of `d_nz`,`o_nz` are 4507 .vb 4508 proc0 dnz = 2, o_nz = 2 4509 proc1 dnz = 3, o_nz = 2 4510 proc2 dnz = 1, o_nz = 4 4511 .ve 4512 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4513 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4514 for proc3. i.e we are using 12+15+10=37 storage locations to store 4515 34 values. 4516 4517 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4518 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4519 In the above case the values for d_nnz,o_nnz are 4520 .vb 4521 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4522 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4523 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4524 .ve 4525 Here the space allocated is sum of all the above values i.e 34, and 4526 hence pre-allocation is perfect. 4527 4528 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4529 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4530 @*/ 4531 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4532 { 4533 PetscMPIInt size; 4534 4535 PetscFunctionBegin; 4536 PetscCall(MatCreate(comm, A)); 4537 PetscCall(MatSetSizes(*A, m, n, M, N)); 4538 PetscCallMPI(MPI_Comm_size(comm, &size)); 4539 if (size > 1) { 4540 PetscCall(MatSetType(*A, MATMPIAIJ)); 4541 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4542 } else { 4543 PetscCall(MatSetType(*A, MATSEQAIJ)); 4544 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4545 } 4546 PetscFunctionReturn(PETSC_SUCCESS); 4547 } 4548 4549 /*MC 4550 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4551 4552 Synopsis: 4553 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4554 4555 Not Collective 4556 4557 Input Parameter: 4558 . A - the `MATMPIAIJ` matrix 4559 4560 Output Parameters: 4561 + Ad - the diagonal portion of the matrix 4562 . Ao - the off-diagonal portion of the matrix 4563 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4564 - ierr - error code 4565 4566 Level: advanced 4567 4568 Note: 4569 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4570 4571 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4572 M*/ 4573 4574 /*MC 4575 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4576 4577 Synopsis: 4578 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4579 4580 Not Collective 4581 4582 Input Parameters: 4583 + A - the `MATMPIAIJ` matrix 4584 . Ad - the diagonal portion of the matrix 4585 . Ao - the off-diagonal portion of the matrix 4586 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4587 - ierr - error code 4588 4589 Level: advanced 4590 4591 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4592 M*/ 4593 4594 /*@C 4595 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4596 4597 Not Collective 4598 4599 Input Parameter: 4600 . A - The `MATMPIAIJ` matrix 4601 4602 Output Parameters: 4603 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4604 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4605 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4606 4607 Level: intermediate 4608 4609 Note: 4610 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4611 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4612 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4613 local column numbers to global column numbers in the original matrix. 4614 4615 Fortran Notes: 4616 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4617 4618 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4619 @*/ 4620 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4621 { 4622 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4623 PetscBool flg; 4624 4625 PetscFunctionBegin; 4626 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4627 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4628 if (Ad) *Ad = a->A; 4629 if (Ao) *Ao = a->B; 4630 if (colmap) *colmap = a->garray; 4631 PetscFunctionReturn(PETSC_SUCCESS); 4632 } 4633 4634 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4635 { 4636 PetscInt m, N, i, rstart, nnz, Ii; 4637 PetscInt *indx; 4638 PetscScalar *values; 4639 MatType rootType; 4640 4641 PetscFunctionBegin; 4642 PetscCall(MatGetSize(inmat, &m, &N)); 4643 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4644 PetscInt *dnz, *onz, sum, bs, cbs; 4645 4646 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4647 /* Check sum(n) = N */ 4648 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4649 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4650 4651 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4652 rstart -= m; 4653 4654 MatPreallocateBegin(comm, m, n, dnz, onz); 4655 for (i = 0; i < m; i++) { 4656 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4657 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4658 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4659 } 4660 4661 PetscCall(MatCreate(comm, outmat)); 4662 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4663 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4664 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4665 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4666 PetscCall(MatSetType(*outmat, rootType)); 4667 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4668 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4669 MatPreallocateEnd(dnz, onz); 4670 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4671 } 4672 4673 /* numeric phase */ 4674 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4675 for (i = 0; i < m; i++) { 4676 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4677 Ii = i + rstart; 4678 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4679 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4680 } 4681 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4682 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4683 PetscFunctionReturn(PETSC_SUCCESS); 4684 } 4685 4686 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4687 { 4688 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4689 4690 PetscFunctionBegin; 4691 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4692 PetscCall(PetscFree(merge->id_r)); 4693 PetscCall(PetscFree(merge->len_s)); 4694 PetscCall(PetscFree(merge->len_r)); 4695 PetscCall(PetscFree(merge->bi)); 4696 PetscCall(PetscFree(merge->bj)); 4697 PetscCall(PetscFree(merge->buf_ri[0])); 4698 PetscCall(PetscFree(merge->buf_ri)); 4699 PetscCall(PetscFree(merge->buf_rj[0])); 4700 PetscCall(PetscFree(merge->buf_rj)); 4701 PetscCall(PetscFree(merge->coi)); 4702 PetscCall(PetscFree(merge->coj)); 4703 PetscCall(PetscFree(merge->owners_co)); 4704 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4705 PetscCall(PetscFree(merge)); 4706 PetscFunctionReturn(PETSC_SUCCESS); 4707 } 4708 4709 #include <../src/mat/utils/freespace.h> 4710 #include <petscbt.h> 4711 4712 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4713 { 4714 MPI_Comm comm; 4715 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4716 PetscMPIInt size, rank, taga, *len_s; 4717 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4718 PetscInt proc, m; 4719 PetscInt **buf_ri, **buf_rj; 4720 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4721 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4722 MPI_Request *s_waits, *r_waits; 4723 MPI_Status *status; 4724 const MatScalar *aa, *a_a; 4725 MatScalar **abuf_r, *ba_i; 4726 Mat_Merge_SeqsToMPI *merge; 4727 PetscContainer container; 4728 4729 PetscFunctionBegin; 4730 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4731 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4732 4733 PetscCallMPI(MPI_Comm_size(comm, &size)); 4734 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4735 4736 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4737 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4738 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4739 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4740 aa = a_a; 4741 4742 bi = merge->bi; 4743 bj = merge->bj; 4744 buf_ri = merge->buf_ri; 4745 buf_rj = merge->buf_rj; 4746 4747 PetscCall(PetscMalloc1(size, &status)); 4748 owners = merge->rowmap->range; 4749 len_s = merge->len_s; 4750 4751 /* send and recv matrix values */ 4752 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4753 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4754 4755 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4756 for (proc = 0, k = 0; proc < size; proc++) { 4757 if (!len_s[proc]) continue; 4758 i = owners[proc]; 4759 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4760 k++; 4761 } 4762 4763 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4764 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4765 PetscCall(PetscFree(status)); 4766 4767 PetscCall(PetscFree(s_waits)); 4768 PetscCall(PetscFree(r_waits)); 4769 4770 /* insert mat values of mpimat */ 4771 PetscCall(PetscMalloc1(N, &ba_i)); 4772 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4773 4774 for (k = 0; k < merge->nrecv; k++) { 4775 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4776 nrows = *buf_ri_k[k]; 4777 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4778 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4779 } 4780 4781 /* set values of ba */ 4782 m = merge->rowmap->n; 4783 for (i = 0; i < m; i++) { 4784 arow = owners[rank] + i; 4785 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4786 bnzi = bi[i + 1] - bi[i]; 4787 PetscCall(PetscArrayzero(ba_i, bnzi)); 4788 4789 /* add local non-zero vals of this proc's seqmat into ba */ 4790 anzi = ai[arow + 1] - ai[arow]; 4791 aj = a->j + ai[arow]; 4792 aa = a_a + ai[arow]; 4793 nextaj = 0; 4794 for (j = 0; nextaj < anzi; j++) { 4795 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4796 ba_i[j] += aa[nextaj++]; 4797 } 4798 } 4799 4800 /* add received vals into ba */ 4801 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4802 /* i-th row */ 4803 if (i == *nextrow[k]) { 4804 anzi = *(nextai[k] + 1) - *nextai[k]; 4805 aj = buf_rj[k] + *nextai[k]; 4806 aa = abuf_r[k] + *nextai[k]; 4807 nextaj = 0; 4808 for (j = 0; nextaj < anzi; j++) { 4809 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4810 ba_i[j] += aa[nextaj++]; 4811 } 4812 } 4813 nextrow[k]++; 4814 nextai[k]++; 4815 } 4816 } 4817 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4818 } 4819 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4820 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4821 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4822 4823 PetscCall(PetscFree(abuf_r[0])); 4824 PetscCall(PetscFree(abuf_r)); 4825 PetscCall(PetscFree(ba_i)); 4826 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4827 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4828 PetscFunctionReturn(PETSC_SUCCESS); 4829 } 4830 4831 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4832 { 4833 Mat B_mpi; 4834 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4835 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4836 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4837 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4838 PetscInt len, proc, *dnz, *onz, bs, cbs; 4839 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4840 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4841 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4842 MPI_Status *status; 4843 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4844 PetscBT lnkbt; 4845 Mat_Merge_SeqsToMPI *merge; 4846 PetscContainer container; 4847 4848 PetscFunctionBegin; 4849 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4850 4851 /* make sure it is a PETSc comm */ 4852 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4853 PetscCallMPI(MPI_Comm_size(comm, &size)); 4854 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4855 4856 PetscCall(PetscNew(&merge)); 4857 PetscCall(PetscMalloc1(size, &status)); 4858 4859 /* determine row ownership */ 4860 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4861 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4862 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4863 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4864 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4865 PetscCall(PetscMalloc1(size, &len_si)); 4866 PetscCall(PetscMalloc1(size, &merge->len_s)); 4867 4868 m = merge->rowmap->n; 4869 owners = merge->rowmap->range; 4870 4871 /* determine the number of messages to send, their lengths */ 4872 len_s = merge->len_s; 4873 4874 len = 0; /* length of buf_si[] */ 4875 merge->nsend = 0; 4876 for (proc = 0; proc < size; proc++) { 4877 len_si[proc] = 0; 4878 if (proc == rank) { 4879 len_s[proc] = 0; 4880 } else { 4881 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4882 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4883 } 4884 if (len_s[proc]) { 4885 merge->nsend++; 4886 nrows = 0; 4887 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4888 if (ai[i + 1] > ai[i]) nrows++; 4889 } 4890 len_si[proc] = 2 * (nrows + 1); 4891 len += len_si[proc]; 4892 } 4893 } 4894 4895 /* determine the number and length of messages to receive for ij-structure */ 4896 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4897 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4898 4899 /* post the Irecv of j-structure */ 4900 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4901 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4902 4903 /* post the Isend of j-structure */ 4904 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4905 4906 for (proc = 0, k = 0; proc < size; proc++) { 4907 if (!len_s[proc]) continue; 4908 i = owners[proc]; 4909 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4910 k++; 4911 } 4912 4913 /* receives and sends of j-structure are complete */ 4914 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4915 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4916 4917 /* send and recv i-structure */ 4918 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4919 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4920 4921 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4922 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4923 for (proc = 0, k = 0; proc < size; proc++) { 4924 if (!len_s[proc]) continue; 4925 /* form outgoing message for i-structure: 4926 buf_si[0]: nrows to be sent 4927 [1:nrows]: row index (global) 4928 [nrows+1:2*nrows+1]: i-structure index 4929 */ 4930 nrows = len_si[proc] / 2 - 1; 4931 buf_si_i = buf_si + nrows + 1; 4932 buf_si[0] = nrows; 4933 buf_si_i[0] = 0; 4934 nrows = 0; 4935 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4936 anzi = ai[i + 1] - ai[i]; 4937 if (anzi) { 4938 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4939 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4940 nrows++; 4941 } 4942 } 4943 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4944 k++; 4945 buf_si += len_si[proc]; 4946 } 4947 4948 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4949 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4950 4951 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4952 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4953 4954 PetscCall(PetscFree(len_si)); 4955 PetscCall(PetscFree(len_ri)); 4956 PetscCall(PetscFree(rj_waits)); 4957 PetscCall(PetscFree2(si_waits, sj_waits)); 4958 PetscCall(PetscFree(ri_waits)); 4959 PetscCall(PetscFree(buf_s)); 4960 PetscCall(PetscFree(status)); 4961 4962 /* compute a local seq matrix in each processor */ 4963 /* allocate bi array and free space for accumulating nonzero column info */ 4964 PetscCall(PetscMalloc1(m + 1, &bi)); 4965 bi[0] = 0; 4966 4967 /* create and initialize a linked list */ 4968 nlnk = N + 1; 4969 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4970 4971 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4972 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4973 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4974 4975 current_space = free_space; 4976 4977 /* determine symbolic info for each local row */ 4978 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4979 4980 for (k = 0; k < merge->nrecv; k++) { 4981 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4982 nrows = *buf_ri_k[k]; 4983 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4984 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4985 } 4986 4987 MatPreallocateBegin(comm, m, n, dnz, onz); 4988 len = 0; 4989 for (i = 0; i < m; i++) { 4990 bnzi = 0; 4991 /* add local non-zero cols of this proc's seqmat into lnk */ 4992 arow = owners[rank] + i; 4993 anzi = ai[arow + 1] - ai[arow]; 4994 aj = a->j + ai[arow]; 4995 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4996 bnzi += nlnk; 4997 /* add received col data into lnk */ 4998 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4999 if (i == *nextrow[k]) { /* i-th row */ 5000 anzi = *(nextai[k] + 1) - *nextai[k]; 5001 aj = buf_rj[k] + *nextai[k]; 5002 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5003 bnzi += nlnk; 5004 nextrow[k]++; 5005 nextai[k]++; 5006 } 5007 } 5008 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5009 5010 /* if free space is not available, make more free space */ 5011 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5012 /* copy data into free space, then initialize lnk */ 5013 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5014 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5015 5016 current_space->array += bnzi; 5017 current_space->local_used += bnzi; 5018 current_space->local_remaining -= bnzi; 5019 5020 bi[i + 1] = bi[i] + bnzi; 5021 } 5022 5023 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5024 5025 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5026 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5027 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5028 5029 /* create symbolic parallel matrix B_mpi */ 5030 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5031 PetscCall(MatCreate(comm, &B_mpi)); 5032 if (n == PETSC_DECIDE) { 5033 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5034 } else { 5035 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5036 } 5037 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5038 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5039 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5040 MatPreallocateEnd(dnz, onz); 5041 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5042 5043 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5044 B_mpi->assembled = PETSC_FALSE; 5045 merge->bi = bi; 5046 merge->bj = bj; 5047 merge->buf_ri = buf_ri; 5048 merge->buf_rj = buf_rj; 5049 merge->coi = NULL; 5050 merge->coj = NULL; 5051 merge->owners_co = NULL; 5052 5053 PetscCall(PetscCommDestroy(&comm)); 5054 5055 /* attach the supporting struct to B_mpi for reuse */ 5056 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5057 PetscCall(PetscContainerSetPointer(container, merge)); 5058 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5059 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5060 PetscCall(PetscContainerDestroy(&container)); 5061 *mpimat = B_mpi; 5062 5063 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5064 PetscFunctionReturn(PETSC_SUCCESS); 5065 } 5066 5067 /*@C 5068 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5069 matrices from each processor 5070 5071 Collective 5072 5073 Input Parameters: 5074 + comm - the communicators the parallel matrix will live on 5075 . seqmat - the input sequential matrices 5076 . m - number of local rows (or `PETSC_DECIDE`) 5077 . n - number of local columns (or `PETSC_DECIDE`) 5078 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5079 5080 Output Parameter: 5081 . mpimat - the parallel matrix generated 5082 5083 Level: advanced 5084 5085 Note: 5086 The dimensions of the sequential matrix in each processor MUST be the same. 5087 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5088 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5089 5090 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5091 @*/ 5092 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5093 { 5094 PetscMPIInt size; 5095 5096 PetscFunctionBegin; 5097 PetscCallMPI(MPI_Comm_size(comm, &size)); 5098 if (size == 1) { 5099 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5100 if (scall == MAT_INITIAL_MATRIX) { 5101 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5102 } else { 5103 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5104 } 5105 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5106 PetscFunctionReturn(PETSC_SUCCESS); 5107 } 5108 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5109 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5110 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5111 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5112 PetscFunctionReturn(PETSC_SUCCESS); 5113 } 5114 5115 /*@ 5116 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5117 5118 Not Collective 5119 5120 Input Parameter: 5121 . A - the matrix 5122 5123 Output Parameter: 5124 . A_loc - the local sequential matrix generated 5125 5126 Level: developer 5127 5128 Notes: 5129 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5130 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5131 `n` is the global column count obtained with `MatGetSize()` 5132 5133 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5134 5135 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5136 5137 Destroy the matrix with `MatDestroy()` 5138 5139 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5140 @*/ 5141 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5142 { 5143 PetscBool mpi; 5144 5145 PetscFunctionBegin; 5146 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5147 if (mpi) { 5148 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5149 } else { 5150 *A_loc = A; 5151 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5152 } 5153 PetscFunctionReturn(PETSC_SUCCESS); 5154 } 5155 5156 /*@ 5157 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5158 5159 Not Collective 5160 5161 Input Parameters: 5162 + A - the matrix 5163 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5164 5165 Output Parameter: 5166 . A_loc - the local sequential matrix generated 5167 5168 Level: developer 5169 5170 Notes: 5171 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5172 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5173 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5174 5175 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5176 5177 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5178 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5179 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5180 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5181 5182 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5183 @*/ 5184 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5185 { 5186 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5187 Mat_SeqAIJ *mat, *a, *b; 5188 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5189 const PetscScalar *aa, *ba, *aav, *bav; 5190 PetscScalar *ca, *cam; 5191 PetscMPIInt size; 5192 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5193 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5194 PetscBool match; 5195 5196 PetscFunctionBegin; 5197 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5198 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5199 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5200 if (size == 1) { 5201 if (scall == MAT_INITIAL_MATRIX) { 5202 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5203 *A_loc = mpimat->A; 5204 } else if (scall == MAT_REUSE_MATRIX) { 5205 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5206 } 5207 PetscFunctionReturn(PETSC_SUCCESS); 5208 } 5209 5210 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5211 a = (Mat_SeqAIJ *)mpimat->A->data; 5212 b = (Mat_SeqAIJ *)mpimat->B->data; 5213 ai = a->i; 5214 aj = a->j; 5215 bi = b->i; 5216 bj = b->j; 5217 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5218 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5219 aa = aav; 5220 ba = bav; 5221 if (scall == MAT_INITIAL_MATRIX) { 5222 PetscCall(PetscMalloc1(1 + am, &ci)); 5223 ci[0] = 0; 5224 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5225 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5226 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5227 k = 0; 5228 for (i = 0; i < am; i++) { 5229 ncols_o = bi[i + 1] - bi[i]; 5230 ncols_d = ai[i + 1] - ai[i]; 5231 /* off-diagonal portion of A */ 5232 for (jo = 0; jo < ncols_o; jo++) { 5233 col = cmap[*bj]; 5234 if (col >= cstart) break; 5235 cj[k] = col; 5236 bj++; 5237 ca[k++] = *ba++; 5238 } 5239 /* diagonal portion of A */ 5240 for (j = 0; j < ncols_d; j++) { 5241 cj[k] = cstart + *aj++; 5242 ca[k++] = *aa++; 5243 } 5244 /* off-diagonal portion of A */ 5245 for (j = jo; j < ncols_o; j++) { 5246 cj[k] = cmap[*bj++]; 5247 ca[k++] = *ba++; 5248 } 5249 } 5250 /* put together the new matrix */ 5251 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5252 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5253 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5254 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5255 mat->free_a = PETSC_TRUE; 5256 mat->free_ij = PETSC_TRUE; 5257 mat->nonew = 0; 5258 } else if (scall == MAT_REUSE_MATRIX) { 5259 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5260 ci = mat->i; 5261 cj = mat->j; 5262 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5263 for (i = 0; i < am; i++) { 5264 /* off-diagonal portion of A */ 5265 ncols_o = bi[i + 1] - bi[i]; 5266 for (jo = 0; jo < ncols_o; jo++) { 5267 col = cmap[*bj]; 5268 if (col >= cstart) break; 5269 *cam++ = *ba++; 5270 bj++; 5271 } 5272 /* diagonal portion of A */ 5273 ncols_d = ai[i + 1] - ai[i]; 5274 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5275 /* off-diagonal portion of A */ 5276 for (j = jo; j < ncols_o; j++) { 5277 *cam++ = *ba++; 5278 bj++; 5279 } 5280 } 5281 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5282 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5283 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5284 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5285 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5286 PetscFunctionReturn(PETSC_SUCCESS); 5287 } 5288 5289 /*@ 5290 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5291 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5292 5293 Not Collective 5294 5295 Input Parameters: 5296 + A - the matrix 5297 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5298 5299 Output Parameters: 5300 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5301 - A_loc - the local sequential matrix generated 5302 5303 Level: developer 5304 5305 Note: 5306 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5307 part, then those associated with the off-diagonal part (in its local ordering) 5308 5309 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5310 @*/ 5311 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5312 { 5313 Mat Ao, Ad; 5314 const PetscInt *cmap; 5315 PetscMPIInt size; 5316 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5317 5318 PetscFunctionBegin; 5319 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5320 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5321 if (size == 1) { 5322 if (scall == MAT_INITIAL_MATRIX) { 5323 PetscCall(PetscObjectReference((PetscObject)Ad)); 5324 *A_loc = Ad; 5325 } else if (scall == MAT_REUSE_MATRIX) { 5326 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5327 } 5328 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5329 PetscFunctionReturn(PETSC_SUCCESS); 5330 } 5331 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5332 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5333 if (f) { 5334 PetscCall((*f)(A, scall, glob, A_loc)); 5335 } else { 5336 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5337 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5338 Mat_SeqAIJ *c; 5339 PetscInt *ai = a->i, *aj = a->j; 5340 PetscInt *bi = b->i, *bj = b->j; 5341 PetscInt *ci, *cj; 5342 const PetscScalar *aa, *ba; 5343 PetscScalar *ca; 5344 PetscInt i, j, am, dn, on; 5345 5346 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5347 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5348 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5349 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5350 if (scall == MAT_INITIAL_MATRIX) { 5351 PetscInt k; 5352 PetscCall(PetscMalloc1(1 + am, &ci)); 5353 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5354 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5355 ci[0] = 0; 5356 for (i = 0, k = 0; i < am; i++) { 5357 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5358 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5359 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5360 /* diagonal portion of A */ 5361 for (j = 0; j < ncols_d; j++, k++) { 5362 cj[k] = *aj++; 5363 ca[k] = *aa++; 5364 } 5365 /* off-diagonal portion of A */ 5366 for (j = 0; j < ncols_o; j++, k++) { 5367 cj[k] = dn + *bj++; 5368 ca[k] = *ba++; 5369 } 5370 } 5371 /* put together the new matrix */ 5372 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5373 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5374 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5375 c = (Mat_SeqAIJ *)(*A_loc)->data; 5376 c->free_a = PETSC_TRUE; 5377 c->free_ij = PETSC_TRUE; 5378 c->nonew = 0; 5379 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5380 } else if (scall == MAT_REUSE_MATRIX) { 5381 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5382 for (i = 0; i < am; i++) { 5383 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5384 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5385 /* diagonal portion of A */ 5386 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5387 /* off-diagonal portion of A */ 5388 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5389 } 5390 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5391 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5392 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5393 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5394 if (glob) { 5395 PetscInt cst, *gidx; 5396 5397 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5398 PetscCall(PetscMalloc1(dn + on, &gidx)); 5399 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5400 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5401 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5402 } 5403 } 5404 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5405 PetscFunctionReturn(PETSC_SUCCESS); 5406 } 5407 5408 /*@C 5409 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5410 5411 Not Collective 5412 5413 Input Parameters: 5414 + A - the matrix 5415 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5416 . row - index set of rows to extract (or `NULL`) 5417 - col - index set of columns to extract (or `NULL`) 5418 5419 Output Parameter: 5420 . A_loc - the local sequential matrix generated 5421 5422 Level: developer 5423 5424 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5425 @*/ 5426 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5427 { 5428 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5429 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5430 IS isrowa, iscola; 5431 Mat *aloc; 5432 PetscBool match; 5433 5434 PetscFunctionBegin; 5435 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5436 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5437 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5438 if (!row) { 5439 start = A->rmap->rstart; 5440 end = A->rmap->rend; 5441 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5442 } else { 5443 isrowa = *row; 5444 } 5445 if (!col) { 5446 start = A->cmap->rstart; 5447 cmap = a->garray; 5448 nzA = a->A->cmap->n; 5449 nzB = a->B->cmap->n; 5450 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5451 ncols = 0; 5452 for (i = 0; i < nzB; i++) { 5453 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5454 else break; 5455 } 5456 imark = i; 5457 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5458 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5459 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5460 } else { 5461 iscola = *col; 5462 } 5463 if (scall != MAT_INITIAL_MATRIX) { 5464 PetscCall(PetscMalloc1(1, &aloc)); 5465 aloc[0] = *A_loc; 5466 } 5467 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5468 if (!col) { /* attach global id of condensed columns */ 5469 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5470 } 5471 *A_loc = aloc[0]; 5472 PetscCall(PetscFree(aloc)); 5473 if (!row) PetscCall(ISDestroy(&isrowa)); 5474 if (!col) PetscCall(ISDestroy(&iscola)); 5475 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5476 PetscFunctionReturn(PETSC_SUCCESS); 5477 } 5478 5479 /* 5480 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5481 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5482 * on a global size. 5483 * */ 5484 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5485 { 5486 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5487 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5488 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5489 PetscMPIInt owner; 5490 PetscSFNode *iremote, *oiremote; 5491 const PetscInt *lrowindices; 5492 PetscSF sf, osf; 5493 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5494 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5495 MPI_Comm comm; 5496 ISLocalToGlobalMapping mapping; 5497 const PetscScalar *pd_a, *po_a; 5498 5499 PetscFunctionBegin; 5500 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5501 /* plocalsize is the number of roots 5502 * nrows is the number of leaves 5503 * */ 5504 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5505 PetscCall(ISGetLocalSize(rows, &nrows)); 5506 PetscCall(PetscCalloc1(nrows, &iremote)); 5507 PetscCall(ISGetIndices(rows, &lrowindices)); 5508 for (i = 0; i < nrows; i++) { 5509 /* Find a remote index and an owner for a row 5510 * The row could be local or remote 5511 * */ 5512 owner = 0; 5513 lidx = 0; 5514 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5515 iremote[i].index = lidx; 5516 iremote[i].rank = owner; 5517 } 5518 /* Create SF to communicate how many nonzero columns for each row */ 5519 PetscCall(PetscSFCreate(comm, &sf)); 5520 /* SF will figure out the number of nonzero columns for each row, and their 5521 * offsets 5522 * */ 5523 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5524 PetscCall(PetscSFSetFromOptions(sf)); 5525 PetscCall(PetscSFSetUp(sf)); 5526 5527 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5528 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5529 PetscCall(PetscCalloc1(nrows, &pnnz)); 5530 roffsets[0] = 0; 5531 roffsets[1] = 0; 5532 for (i = 0; i < plocalsize; i++) { 5533 /* diagonal */ 5534 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5535 /* off-diagonal */ 5536 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5537 /* compute offsets so that we relative location for each row */ 5538 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5539 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5540 } 5541 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5542 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5543 /* 'r' means root, and 'l' means leaf */ 5544 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5545 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5546 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5547 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5548 PetscCall(PetscSFDestroy(&sf)); 5549 PetscCall(PetscFree(roffsets)); 5550 PetscCall(PetscFree(nrcols)); 5551 dntotalcols = 0; 5552 ontotalcols = 0; 5553 ncol = 0; 5554 for (i = 0; i < nrows; i++) { 5555 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5556 ncol = PetscMax(pnnz[i], ncol); 5557 /* diagonal */ 5558 dntotalcols += nlcols[i * 2 + 0]; 5559 /* off-diagonal */ 5560 ontotalcols += nlcols[i * 2 + 1]; 5561 } 5562 /* We do not need to figure the right number of columns 5563 * since all the calculations will be done by going through the raw data 5564 * */ 5565 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5566 PetscCall(MatSetUp(*P_oth)); 5567 PetscCall(PetscFree(pnnz)); 5568 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5569 /* diagonal */ 5570 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5571 /* off-diagonal */ 5572 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5573 /* diagonal */ 5574 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5575 /* off-diagonal */ 5576 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5577 dntotalcols = 0; 5578 ontotalcols = 0; 5579 ntotalcols = 0; 5580 for (i = 0; i < nrows; i++) { 5581 owner = 0; 5582 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5583 /* Set iremote for diag matrix */ 5584 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5585 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5586 iremote[dntotalcols].rank = owner; 5587 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5588 ilocal[dntotalcols++] = ntotalcols++; 5589 } 5590 /* off-diagonal */ 5591 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5592 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5593 oiremote[ontotalcols].rank = owner; 5594 oilocal[ontotalcols++] = ntotalcols++; 5595 } 5596 } 5597 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5598 PetscCall(PetscFree(loffsets)); 5599 PetscCall(PetscFree(nlcols)); 5600 PetscCall(PetscSFCreate(comm, &sf)); 5601 /* P serves as roots and P_oth is leaves 5602 * Diag matrix 5603 * */ 5604 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5605 PetscCall(PetscSFSetFromOptions(sf)); 5606 PetscCall(PetscSFSetUp(sf)); 5607 5608 PetscCall(PetscSFCreate(comm, &osf)); 5609 /* off-diagonal */ 5610 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5611 PetscCall(PetscSFSetFromOptions(osf)); 5612 PetscCall(PetscSFSetUp(osf)); 5613 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5614 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5615 /* operate on the matrix internal data to save memory */ 5616 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5617 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5618 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5619 /* Convert to global indices for diag matrix */ 5620 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5621 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5622 /* We want P_oth store global indices */ 5623 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5624 /* Use memory scalable approach */ 5625 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5626 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5627 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5628 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5629 /* Convert back to local indices */ 5630 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5631 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5632 nout = 0; 5633 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5634 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5635 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5636 /* Exchange values */ 5637 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5638 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5639 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5640 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5641 /* Stop PETSc from shrinking memory */ 5642 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5643 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5644 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5645 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5646 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5647 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5648 PetscCall(PetscSFDestroy(&sf)); 5649 PetscCall(PetscSFDestroy(&osf)); 5650 PetscFunctionReturn(PETSC_SUCCESS); 5651 } 5652 5653 /* 5654 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5655 * This supports MPIAIJ and MAIJ 5656 * */ 5657 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5658 { 5659 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5660 Mat_SeqAIJ *p_oth; 5661 IS rows, map; 5662 PetscHMapI hamp; 5663 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5664 MPI_Comm comm; 5665 PetscSF sf, osf; 5666 PetscBool has; 5667 5668 PetscFunctionBegin; 5669 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5670 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5671 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5672 * and then create a submatrix (that often is an overlapping matrix) 5673 * */ 5674 if (reuse == MAT_INITIAL_MATRIX) { 5675 /* Use a hash table to figure out unique keys */ 5676 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5677 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5678 count = 0; 5679 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5680 for (i = 0; i < a->B->cmap->n; i++) { 5681 key = a->garray[i] / dof; 5682 PetscCall(PetscHMapIHas(hamp, key, &has)); 5683 if (!has) { 5684 mapping[i] = count; 5685 PetscCall(PetscHMapISet(hamp, key, count++)); 5686 } else { 5687 /* Current 'i' has the same value the previous step */ 5688 mapping[i] = count - 1; 5689 } 5690 } 5691 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5692 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5693 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5694 PetscCall(PetscCalloc1(htsize, &rowindices)); 5695 off = 0; 5696 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5697 PetscCall(PetscHMapIDestroy(&hamp)); 5698 PetscCall(PetscSortInt(htsize, rowindices)); 5699 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5700 /* In case, the matrix was already created but users want to recreate the matrix */ 5701 PetscCall(MatDestroy(P_oth)); 5702 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5703 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5704 PetscCall(ISDestroy(&map)); 5705 PetscCall(ISDestroy(&rows)); 5706 } else if (reuse == MAT_REUSE_MATRIX) { 5707 /* If matrix was already created, we simply update values using SF objects 5708 * that as attached to the matrix earlier. 5709 */ 5710 const PetscScalar *pd_a, *po_a; 5711 5712 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5713 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5714 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5715 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5716 /* Update values in place */ 5717 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5718 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5719 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5720 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5721 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5722 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5723 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5724 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5725 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5726 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5727 PetscFunctionReturn(PETSC_SUCCESS); 5728 } 5729 5730 /*@C 5731 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5732 5733 Collective 5734 5735 Input Parameters: 5736 + A - the first matrix in `MATMPIAIJ` format 5737 . B - the second matrix in `MATMPIAIJ` format 5738 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5739 5740 Output Parameters: 5741 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5742 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5743 - B_seq - the sequential matrix generated 5744 5745 Level: developer 5746 5747 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5748 @*/ 5749 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5750 { 5751 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5752 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5753 IS isrowb, iscolb; 5754 Mat *bseq = NULL; 5755 5756 PetscFunctionBegin; 5757 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5758 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5759 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5760 5761 if (scall == MAT_INITIAL_MATRIX) { 5762 start = A->cmap->rstart; 5763 cmap = a->garray; 5764 nzA = a->A->cmap->n; 5765 nzB = a->B->cmap->n; 5766 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5767 ncols = 0; 5768 for (i = 0; i < nzB; i++) { /* row < local row index */ 5769 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5770 else break; 5771 } 5772 imark = i; 5773 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5774 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5775 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5776 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5777 } else { 5778 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5779 isrowb = *rowb; 5780 iscolb = *colb; 5781 PetscCall(PetscMalloc1(1, &bseq)); 5782 bseq[0] = *B_seq; 5783 } 5784 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5785 *B_seq = bseq[0]; 5786 PetscCall(PetscFree(bseq)); 5787 if (!rowb) { 5788 PetscCall(ISDestroy(&isrowb)); 5789 } else { 5790 *rowb = isrowb; 5791 } 5792 if (!colb) { 5793 PetscCall(ISDestroy(&iscolb)); 5794 } else { 5795 *colb = iscolb; 5796 } 5797 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5798 PetscFunctionReturn(PETSC_SUCCESS); 5799 } 5800 5801 /* 5802 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5803 of the OFF-DIAGONAL portion of local A 5804 5805 Collective 5806 5807 Input Parameters: 5808 + A,B - the matrices in `MATMPIAIJ` format 5809 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5810 5811 Output Parameter: 5812 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5813 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5814 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5815 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5816 5817 Developer Note: 5818 This directly accesses information inside the VecScatter associated with the matrix-vector product 5819 for this matrix. This is not desirable.. 5820 5821 Level: developer 5822 5823 */ 5824 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5825 { 5826 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5827 Mat_SeqAIJ *b_oth; 5828 VecScatter ctx; 5829 MPI_Comm comm; 5830 const PetscMPIInt *rprocs, *sprocs; 5831 const PetscInt *srow, *rstarts, *sstarts; 5832 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5833 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5834 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5835 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5836 PetscMPIInt size, tag, rank, nreqs; 5837 5838 PetscFunctionBegin; 5839 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5840 PetscCallMPI(MPI_Comm_size(comm, &size)); 5841 5842 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5843 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5844 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5845 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5846 5847 if (size == 1) { 5848 startsj_s = NULL; 5849 bufa_ptr = NULL; 5850 *B_oth = NULL; 5851 PetscFunctionReturn(PETSC_SUCCESS); 5852 } 5853 5854 ctx = a->Mvctx; 5855 tag = ((PetscObject)ctx)->tag; 5856 5857 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5858 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5859 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5860 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5861 PetscCall(PetscMalloc1(nreqs, &reqs)); 5862 rwaits = reqs; 5863 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5864 5865 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5866 if (scall == MAT_INITIAL_MATRIX) { 5867 /* i-array */ 5868 /* post receives */ 5869 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5870 for (i = 0; i < nrecvs; i++) { 5871 rowlen = rvalues + rstarts[i] * rbs; 5872 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5873 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5874 } 5875 5876 /* pack the outgoing message */ 5877 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5878 5879 sstartsj[0] = 0; 5880 rstartsj[0] = 0; 5881 len = 0; /* total length of j or a array to be sent */ 5882 if (nsends) { 5883 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5884 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5885 } 5886 for (i = 0; i < nsends; i++) { 5887 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5888 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5889 for (j = 0; j < nrows; j++) { 5890 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5891 for (l = 0; l < sbs; l++) { 5892 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5893 5894 rowlen[j * sbs + l] = ncols; 5895 5896 len += ncols; 5897 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5898 } 5899 k++; 5900 } 5901 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5902 5903 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5904 } 5905 /* recvs and sends of i-array are completed */ 5906 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5907 PetscCall(PetscFree(svalues)); 5908 5909 /* allocate buffers for sending j and a arrays */ 5910 PetscCall(PetscMalloc1(len + 1, &bufj)); 5911 PetscCall(PetscMalloc1(len + 1, &bufa)); 5912 5913 /* create i-array of B_oth */ 5914 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5915 5916 b_othi[0] = 0; 5917 len = 0; /* total length of j or a array to be received */ 5918 k = 0; 5919 for (i = 0; i < nrecvs; i++) { 5920 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5921 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5922 for (j = 0; j < nrows; j++) { 5923 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5924 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5925 k++; 5926 } 5927 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5928 } 5929 PetscCall(PetscFree(rvalues)); 5930 5931 /* allocate space for j and a arrays of B_oth */ 5932 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5933 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5934 5935 /* j-array */ 5936 /* post receives of j-array */ 5937 for (i = 0; i < nrecvs; i++) { 5938 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5939 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5940 } 5941 5942 /* pack the outgoing message j-array */ 5943 if (nsends) k = sstarts[0]; 5944 for (i = 0; i < nsends; i++) { 5945 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5946 bufJ = bufj + sstartsj[i]; 5947 for (j = 0; j < nrows; j++) { 5948 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5949 for (ll = 0; ll < sbs; ll++) { 5950 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5951 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5952 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5953 } 5954 } 5955 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5956 } 5957 5958 /* recvs and sends of j-array are completed */ 5959 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5960 } else if (scall == MAT_REUSE_MATRIX) { 5961 sstartsj = *startsj_s; 5962 rstartsj = *startsj_r; 5963 bufa = *bufa_ptr; 5964 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5965 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5966 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5967 5968 /* a-array */ 5969 /* post receives of a-array */ 5970 for (i = 0; i < nrecvs; i++) { 5971 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5972 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5973 } 5974 5975 /* pack the outgoing message a-array */ 5976 if (nsends) k = sstarts[0]; 5977 for (i = 0; i < nsends; i++) { 5978 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5979 bufA = bufa + sstartsj[i]; 5980 for (j = 0; j < nrows; j++) { 5981 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5982 for (ll = 0; ll < sbs; ll++) { 5983 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5984 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5985 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5986 } 5987 } 5988 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5989 } 5990 /* recvs and sends of a-array are completed */ 5991 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5992 PetscCall(PetscFree(reqs)); 5993 5994 if (scall == MAT_INITIAL_MATRIX) { 5995 /* put together the new matrix */ 5996 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5997 5998 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5999 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6000 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6001 b_oth->free_a = PETSC_TRUE; 6002 b_oth->free_ij = PETSC_TRUE; 6003 b_oth->nonew = 0; 6004 6005 PetscCall(PetscFree(bufj)); 6006 if (!startsj_s || !bufa_ptr) { 6007 PetscCall(PetscFree2(sstartsj, rstartsj)); 6008 PetscCall(PetscFree(bufa_ptr)); 6009 } else { 6010 *startsj_s = sstartsj; 6011 *startsj_r = rstartsj; 6012 *bufa_ptr = bufa; 6013 } 6014 } else if (scall == MAT_REUSE_MATRIX) { 6015 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6016 } 6017 6018 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6019 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6020 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6021 PetscFunctionReturn(PETSC_SUCCESS); 6022 } 6023 6024 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6025 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6026 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6027 #if defined(PETSC_HAVE_MKL_SPARSE) 6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6029 #endif 6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6032 #if defined(PETSC_HAVE_ELEMENTAL) 6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6034 #endif 6035 #if defined(PETSC_HAVE_SCALAPACK) 6036 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6037 #endif 6038 #if defined(PETSC_HAVE_HYPRE) 6039 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6040 #endif 6041 #if defined(PETSC_HAVE_CUDA) 6042 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6043 #endif 6044 #if defined(PETSC_HAVE_HIP) 6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6046 #endif 6047 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6049 #endif 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6051 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6052 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6053 6054 /* 6055 Computes (B'*A')' since computing B*A directly is untenable 6056 6057 n p p 6058 [ ] [ ] [ ] 6059 m [ A ] * n [ B ] = m [ C ] 6060 [ ] [ ] [ ] 6061 6062 */ 6063 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6064 { 6065 Mat At, Bt, Ct; 6066 6067 PetscFunctionBegin; 6068 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6069 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6070 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6071 PetscCall(MatDestroy(&At)); 6072 PetscCall(MatDestroy(&Bt)); 6073 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6074 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6075 PetscCall(MatDestroy(&Ct)); 6076 PetscFunctionReturn(PETSC_SUCCESS); 6077 } 6078 6079 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6080 { 6081 PetscBool cisdense; 6082 6083 PetscFunctionBegin; 6084 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6085 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6086 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6087 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6088 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6089 PetscCall(MatSetUp(C)); 6090 6091 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6092 PetscFunctionReturn(PETSC_SUCCESS); 6093 } 6094 6095 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6096 { 6097 Mat_Product *product = C->product; 6098 Mat A = product->A, B = product->B; 6099 6100 PetscFunctionBegin; 6101 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6102 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6103 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6104 C->ops->productsymbolic = MatProductSymbolic_AB; 6105 PetscFunctionReturn(PETSC_SUCCESS); 6106 } 6107 6108 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6109 { 6110 Mat_Product *product = C->product; 6111 6112 PetscFunctionBegin; 6113 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6114 PetscFunctionReturn(PETSC_SUCCESS); 6115 } 6116 6117 /* 6118 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6119 6120 Input Parameters: 6121 6122 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6123 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6124 6125 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6126 6127 For Set1, j1[] contains column indices of the nonzeros. 6128 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6129 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6130 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6131 6132 Similar for Set2. 6133 6134 This routine merges the two sets of nonzeros row by row and removes repeats. 6135 6136 Output Parameters: (memory is allocated by the caller) 6137 6138 i[],j[]: the CSR of the merged matrix, which has m rows. 6139 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6140 imap2[]: similar to imap1[], but for Set2. 6141 Note we order nonzeros row-by-row and from left to right. 6142 */ 6143 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6144 { 6145 PetscInt r, m; /* Row index of mat */ 6146 PetscCount t, t1, t2, b1, e1, b2, e2; 6147 6148 PetscFunctionBegin; 6149 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6150 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6151 i[0] = 0; 6152 for (r = 0; r < m; r++) { /* Do row by row merging */ 6153 b1 = rowBegin1[r]; 6154 e1 = rowEnd1[r]; 6155 b2 = rowBegin2[r]; 6156 e2 = rowEnd2[r]; 6157 while (b1 < e1 && b2 < e2) { 6158 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6159 j[t] = j1[b1]; 6160 imap1[t1] = t; 6161 imap2[t2] = t; 6162 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6163 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6164 t1++; 6165 t2++; 6166 t++; 6167 } else if (j1[b1] < j2[b2]) { 6168 j[t] = j1[b1]; 6169 imap1[t1] = t; 6170 b1 += jmap1[t1 + 1] - jmap1[t1]; 6171 t1++; 6172 t++; 6173 } else { 6174 j[t] = j2[b2]; 6175 imap2[t2] = t; 6176 b2 += jmap2[t2 + 1] - jmap2[t2]; 6177 t2++; 6178 t++; 6179 } 6180 } 6181 /* Merge the remaining in either j1[] or j2[] */ 6182 while (b1 < e1) { 6183 j[t] = j1[b1]; 6184 imap1[t1] = t; 6185 b1 += jmap1[t1 + 1] - jmap1[t1]; 6186 t1++; 6187 t++; 6188 } 6189 while (b2 < e2) { 6190 j[t] = j2[b2]; 6191 imap2[t2] = t; 6192 b2 += jmap2[t2 + 1] - jmap2[t2]; 6193 t2++; 6194 t++; 6195 } 6196 i[r + 1] = t; 6197 } 6198 PetscFunctionReturn(PETSC_SUCCESS); 6199 } 6200 6201 /* 6202 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6203 6204 Input Parameters: 6205 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6206 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6207 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6208 6209 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6210 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6211 6212 Output Parameters: 6213 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6214 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6215 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6216 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6217 6218 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6219 Atot: number of entries belonging to the diagonal block. 6220 Annz: number of unique nonzeros belonging to the diagonal block. 6221 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6222 repeats (i.e., same 'i,j' pair). 6223 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6224 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6225 6226 Atot: number of entries belonging to the diagonal block 6227 Annz: number of unique nonzeros belonging to the diagonal block. 6228 6229 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6230 6231 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6232 */ 6233 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6234 { 6235 PetscInt cstart, cend, rstart, rend, row, col; 6236 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6237 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6238 PetscCount k, m, p, q, r, s, mid; 6239 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6240 6241 PetscFunctionBegin; 6242 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6243 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6244 m = rend - rstart; 6245 6246 /* Skip negative rows */ 6247 for (k = 0; k < n; k++) 6248 if (i[k] >= 0) break; 6249 6250 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6251 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6252 */ 6253 while (k < n) { 6254 row = i[k]; 6255 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6256 for (s = k; s < n; s++) 6257 if (i[s] != row) break; 6258 6259 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6260 for (p = k; p < s; p++) { 6261 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6262 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6263 } 6264 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6265 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6266 rowBegin[row - rstart] = k; 6267 rowMid[row - rstart] = mid; 6268 rowEnd[row - rstart] = s; 6269 6270 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6271 Atot += mid - k; 6272 Btot += s - mid; 6273 6274 /* Count unique nonzeros of this diag row */ 6275 for (p = k; p < mid;) { 6276 col = j[p]; 6277 do { 6278 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6279 p++; 6280 } while (p < mid && j[p] == col); 6281 Annz++; 6282 } 6283 6284 /* Count unique nonzeros of this offdiag row */ 6285 for (p = mid; p < s;) { 6286 col = j[p]; 6287 do { 6288 p++; 6289 } while (p < s && j[p] == col); 6290 Bnnz++; 6291 } 6292 k = s; 6293 } 6294 6295 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6296 PetscCall(PetscMalloc1(Atot, &Aperm)); 6297 PetscCall(PetscMalloc1(Btot, &Bperm)); 6298 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6299 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6300 6301 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6302 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6303 for (r = 0; r < m; r++) { 6304 k = rowBegin[r]; 6305 mid = rowMid[r]; 6306 s = rowEnd[r]; 6307 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6308 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6309 Atot += mid - k; 6310 Btot += s - mid; 6311 6312 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6313 for (p = k; p < mid;) { 6314 col = j[p]; 6315 q = p; 6316 do { 6317 p++; 6318 } while (p < mid && j[p] == col); 6319 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6320 Annz++; 6321 } 6322 6323 for (p = mid; p < s;) { 6324 col = j[p]; 6325 q = p; 6326 do { 6327 p++; 6328 } while (p < s && j[p] == col); 6329 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6330 Bnnz++; 6331 } 6332 } 6333 /* Output */ 6334 *Aperm_ = Aperm; 6335 *Annz_ = Annz; 6336 *Atot_ = Atot; 6337 *Ajmap_ = Ajmap; 6338 *Bperm_ = Bperm; 6339 *Bnnz_ = Bnnz; 6340 *Btot_ = Btot; 6341 *Bjmap_ = Bjmap; 6342 PetscFunctionReturn(PETSC_SUCCESS); 6343 } 6344 6345 /* 6346 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6347 6348 Input Parameters: 6349 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6350 nnz: number of unique nonzeros in the merged matrix 6351 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6352 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6353 6354 Output Parameter: (memory is allocated by the caller) 6355 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6356 6357 Example: 6358 nnz1 = 4 6359 nnz = 6 6360 imap = [1,3,4,5] 6361 jmap = [0,3,5,6,7] 6362 then, 6363 jmap_new = [0,0,3,3,5,6,7] 6364 */ 6365 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6366 { 6367 PetscCount k, p; 6368 6369 PetscFunctionBegin; 6370 jmap_new[0] = 0; 6371 p = nnz; /* p loops over jmap_new[] backwards */ 6372 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6373 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6374 } 6375 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6376 PetscFunctionReturn(PETSC_SUCCESS); 6377 } 6378 6379 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6380 { 6381 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6382 6383 PetscFunctionBegin; 6384 PetscCall(PetscSFDestroy(&coo->sf)); 6385 PetscCall(PetscFree(coo->Aperm1)); 6386 PetscCall(PetscFree(coo->Bperm1)); 6387 PetscCall(PetscFree(coo->Ajmap1)); 6388 PetscCall(PetscFree(coo->Bjmap1)); 6389 PetscCall(PetscFree(coo->Aimap2)); 6390 PetscCall(PetscFree(coo->Bimap2)); 6391 PetscCall(PetscFree(coo->Aperm2)); 6392 PetscCall(PetscFree(coo->Bperm2)); 6393 PetscCall(PetscFree(coo->Ajmap2)); 6394 PetscCall(PetscFree(coo->Bjmap2)); 6395 PetscCall(PetscFree(coo->Cperm1)); 6396 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6397 PetscCall(PetscFree(coo)); 6398 PetscFunctionReturn(PETSC_SUCCESS); 6399 } 6400 6401 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6402 { 6403 MPI_Comm comm; 6404 PetscMPIInt rank, size; 6405 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6406 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6407 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6408 PetscContainer container; 6409 MatCOOStruct_MPIAIJ *coo; 6410 6411 PetscFunctionBegin; 6412 PetscCall(PetscFree(mpiaij->garray)); 6413 PetscCall(VecDestroy(&mpiaij->lvec)); 6414 #if defined(PETSC_USE_CTABLE) 6415 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6416 #else 6417 PetscCall(PetscFree(mpiaij->colmap)); 6418 #endif 6419 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6420 mat->assembled = PETSC_FALSE; 6421 mat->was_assembled = PETSC_FALSE; 6422 6423 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6424 PetscCallMPI(MPI_Comm_size(comm, &size)); 6425 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6426 PetscCall(PetscLayoutSetUp(mat->rmap)); 6427 PetscCall(PetscLayoutSetUp(mat->cmap)); 6428 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6429 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6430 PetscCall(MatGetLocalSize(mat, &m, &n)); 6431 PetscCall(MatGetSize(mat, &M, &N)); 6432 6433 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6434 /* entries come first, then local rows, then remote rows. */ 6435 PetscCount n1 = coo_n, *perm1; 6436 PetscInt *i1 = coo_i, *j1 = coo_j; 6437 6438 PetscCall(PetscMalloc1(n1, &perm1)); 6439 for (k = 0; k < n1; k++) perm1[k] = k; 6440 6441 /* Manipulate indices so that entries with negative row or col indices will have smallest 6442 row indices, local entries will have greater but negative row indices, and remote entries 6443 will have positive row indices. 6444 */ 6445 for (k = 0; k < n1; k++) { 6446 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6447 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6448 else { 6449 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6450 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6451 } 6452 } 6453 6454 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6455 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6456 6457 /* Advance k to the first entry we need to take care of */ 6458 for (k = 0; k < n1; k++) 6459 if (i1[k] > PETSC_MIN_INT) break; 6460 PetscInt i1start = k; 6461 6462 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6463 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6464 6465 /* Send remote rows to their owner */ 6466 /* Find which rows should be sent to which remote ranks*/ 6467 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6468 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6469 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6470 const PetscInt *ranges; 6471 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6472 6473 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6474 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6475 for (k = rem; k < n1;) { 6476 PetscMPIInt owner; 6477 PetscInt firstRow, lastRow; 6478 6479 /* Locate a row range */ 6480 firstRow = i1[k]; /* first row of this owner */ 6481 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6482 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6483 6484 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6485 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6486 6487 /* All entries in [k,p) belong to this remote owner */ 6488 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6489 PetscMPIInt *sendto2; 6490 PetscInt *nentries2; 6491 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6492 6493 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6494 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6495 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6496 PetscCall(PetscFree2(sendto, nentries2)); 6497 sendto = sendto2; 6498 nentries = nentries2; 6499 maxNsend = maxNsend2; 6500 } 6501 sendto[nsend] = owner; 6502 nentries[nsend] = p - k; 6503 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6504 nsend++; 6505 k = p; 6506 } 6507 6508 /* Build 1st SF to know offsets on remote to send data */ 6509 PetscSF sf1; 6510 PetscInt nroots = 1, nroots2 = 0; 6511 PetscInt nleaves = nsend, nleaves2 = 0; 6512 PetscInt *offsets; 6513 PetscSFNode *iremote; 6514 6515 PetscCall(PetscSFCreate(comm, &sf1)); 6516 PetscCall(PetscMalloc1(nsend, &iremote)); 6517 PetscCall(PetscMalloc1(nsend, &offsets)); 6518 for (k = 0; k < nsend; k++) { 6519 iremote[k].rank = sendto[k]; 6520 iremote[k].index = 0; 6521 nleaves2 += nentries[k]; 6522 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6523 } 6524 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6525 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6526 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6527 PetscCall(PetscSFDestroy(&sf1)); 6528 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6529 6530 /* Build 2nd SF to send remote COOs to their owner */ 6531 PetscSF sf2; 6532 nroots = nroots2; 6533 nleaves = nleaves2; 6534 PetscCall(PetscSFCreate(comm, &sf2)); 6535 PetscCall(PetscSFSetFromOptions(sf2)); 6536 PetscCall(PetscMalloc1(nleaves, &iremote)); 6537 p = 0; 6538 for (k = 0; k < nsend; k++) { 6539 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6540 for (q = 0; q < nentries[k]; q++, p++) { 6541 iremote[p].rank = sendto[k]; 6542 iremote[p].index = offsets[k] + q; 6543 } 6544 } 6545 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6546 6547 /* Send the remote COOs to their owner */ 6548 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6549 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6550 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6551 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6552 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6553 PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */ 6554 PetscInt *j1prem = j1 ? j1 + rem : NULL; 6555 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6556 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6557 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6558 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6559 6560 PetscCall(PetscFree(offsets)); 6561 PetscCall(PetscFree2(sendto, nentries)); 6562 6563 /* Sort received COOs by row along with the permutation array */ 6564 for (k = 0; k < n2; k++) perm2[k] = k; 6565 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6566 6567 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6568 PetscCount *Cperm1; 6569 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6570 PetscCount *perm1prem = perm1 ? perm1 + rem : NULL; 6571 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6572 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6573 6574 /* Support for HYPRE matrices, kind of a hack. 6575 Swap min column with diagonal so that diagonal values will go first */ 6576 PetscBool hypre; 6577 const char *name; 6578 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6579 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6580 if (hypre) { 6581 PetscInt *minj; 6582 PetscBT hasdiag; 6583 6584 PetscCall(PetscBTCreate(m, &hasdiag)); 6585 PetscCall(PetscMalloc1(m, &minj)); 6586 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6587 for (k = i1start; k < rem; k++) { 6588 if (j1[k] < cstart || j1[k] >= cend) continue; 6589 const PetscInt rindex = i1[k] - rstart; 6590 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6591 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6592 } 6593 for (k = 0; k < n2; k++) { 6594 if (j2[k] < cstart || j2[k] >= cend) continue; 6595 const PetscInt rindex = i2[k] - rstart; 6596 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6597 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6598 } 6599 for (k = i1start; k < rem; k++) { 6600 const PetscInt rindex = i1[k] - rstart; 6601 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6602 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6603 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6604 } 6605 for (k = 0; k < n2; k++) { 6606 const PetscInt rindex = i2[k] - rstart; 6607 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6608 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6609 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6610 } 6611 PetscCall(PetscBTDestroy(&hasdiag)); 6612 PetscCall(PetscFree(minj)); 6613 } 6614 6615 /* Split local COOs and received COOs into diag/offdiag portions */ 6616 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6617 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6618 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6619 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6620 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6621 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6622 6623 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6624 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6625 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6626 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6627 6628 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6629 PetscInt *Ai, *Bi; 6630 PetscInt *Aj, *Bj; 6631 6632 PetscCall(PetscMalloc1(m + 1, &Ai)); 6633 PetscCall(PetscMalloc1(m + 1, &Bi)); 6634 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6635 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6636 6637 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6638 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6639 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6640 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6641 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6642 6643 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6644 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6645 6646 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6647 /* expect nonzeros in A/B most likely have local contributing entries */ 6648 PetscInt Annz = Ai[m]; 6649 PetscInt Bnnz = Bi[m]; 6650 PetscCount *Ajmap1_new, *Bjmap1_new; 6651 6652 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6653 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6654 6655 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6656 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6657 6658 PetscCall(PetscFree(Aimap1)); 6659 PetscCall(PetscFree(Ajmap1)); 6660 PetscCall(PetscFree(Bimap1)); 6661 PetscCall(PetscFree(Bjmap1)); 6662 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6663 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6664 PetscCall(PetscFree(perm1)); 6665 PetscCall(PetscFree3(i2, j2, perm2)); 6666 6667 Ajmap1 = Ajmap1_new; 6668 Bjmap1 = Bjmap1_new; 6669 6670 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6671 if (Annz < Annz1 + Annz2) { 6672 PetscInt *Aj_new; 6673 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6674 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6675 PetscCall(PetscFree(Aj)); 6676 Aj = Aj_new; 6677 } 6678 6679 if (Bnnz < Bnnz1 + Bnnz2) { 6680 PetscInt *Bj_new; 6681 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6682 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6683 PetscCall(PetscFree(Bj)); 6684 Bj = Bj_new; 6685 } 6686 6687 /* Create new submatrices for on-process and off-process coupling */ 6688 PetscScalar *Aa, *Ba; 6689 MatType rtype; 6690 Mat_SeqAIJ *a, *b; 6691 PetscObjectState state; 6692 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6693 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6694 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6695 if (cstart) { 6696 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6697 } 6698 6699 PetscCall(MatGetRootType_Private(mat, &rtype)); 6700 6701 MatSeqXAIJGetOptions_Private(mpiaij->A); 6702 PetscCall(MatDestroy(&mpiaij->A)); 6703 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6704 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6705 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6706 6707 MatSeqXAIJGetOptions_Private(mpiaij->B); 6708 PetscCall(MatDestroy(&mpiaij->B)); 6709 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6710 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6711 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6712 6713 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6714 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6715 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6716 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6717 6718 a = (Mat_SeqAIJ *)mpiaij->A->data; 6719 b = (Mat_SeqAIJ *)mpiaij->B->data; 6720 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6721 a->free_a = b->free_a = PETSC_TRUE; 6722 a->free_ij = b->free_ij = PETSC_TRUE; 6723 6724 /* conversion must happen AFTER multiply setup */ 6725 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6726 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6727 PetscCall(VecDestroy(&mpiaij->lvec)); 6728 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6729 6730 // Put the COO struct in a container and then attach that to the matrix 6731 PetscCall(PetscMalloc1(1, &coo)); 6732 coo->n = coo_n; 6733 coo->sf = sf2; 6734 coo->sendlen = nleaves; 6735 coo->recvlen = nroots; 6736 coo->Annz = Annz; 6737 coo->Bnnz = Bnnz; 6738 coo->Annz2 = Annz2; 6739 coo->Bnnz2 = Bnnz2; 6740 coo->Atot1 = Atot1; 6741 coo->Atot2 = Atot2; 6742 coo->Btot1 = Btot1; 6743 coo->Btot2 = Btot2; 6744 coo->Ajmap1 = Ajmap1; 6745 coo->Aperm1 = Aperm1; 6746 coo->Bjmap1 = Bjmap1; 6747 coo->Bperm1 = Bperm1; 6748 coo->Aimap2 = Aimap2; 6749 coo->Ajmap2 = Ajmap2; 6750 coo->Aperm2 = Aperm2; 6751 coo->Bimap2 = Bimap2; 6752 coo->Bjmap2 = Bjmap2; 6753 coo->Bperm2 = Bperm2; 6754 coo->Cperm1 = Cperm1; 6755 // Allocate in preallocation. If not used, it has zero cost on host 6756 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6757 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6758 PetscCall(PetscContainerSetPointer(container, coo)); 6759 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6760 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6761 PetscCall(PetscContainerDestroy(&container)); 6762 PetscFunctionReturn(PETSC_SUCCESS); 6763 } 6764 6765 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6766 { 6767 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6768 Mat A = mpiaij->A, B = mpiaij->B; 6769 PetscScalar *Aa, *Ba; 6770 PetscScalar *sendbuf, *recvbuf; 6771 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6772 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6773 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6774 const PetscCount *Cperm1; 6775 PetscContainer container; 6776 MatCOOStruct_MPIAIJ *coo; 6777 6778 PetscFunctionBegin; 6779 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6780 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6781 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6782 sendbuf = coo->sendbuf; 6783 recvbuf = coo->recvbuf; 6784 Ajmap1 = coo->Ajmap1; 6785 Ajmap2 = coo->Ajmap2; 6786 Aimap2 = coo->Aimap2; 6787 Bjmap1 = coo->Bjmap1; 6788 Bjmap2 = coo->Bjmap2; 6789 Bimap2 = coo->Bimap2; 6790 Aperm1 = coo->Aperm1; 6791 Aperm2 = coo->Aperm2; 6792 Bperm1 = coo->Bperm1; 6793 Bperm2 = coo->Bperm2; 6794 Cperm1 = coo->Cperm1; 6795 6796 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6797 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6798 6799 /* Pack entries to be sent to remote */ 6800 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6801 6802 /* Send remote entries to their owner and overlap the communication with local computation */ 6803 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6804 /* Add local entries to A and B */ 6805 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6806 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6807 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6808 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6809 } 6810 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6811 PetscScalar sum = 0.0; 6812 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6813 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6814 } 6815 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6816 6817 /* Add received remote entries to A and B */ 6818 for (PetscCount i = 0; i < coo->Annz2; i++) { 6819 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6820 } 6821 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6822 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6823 } 6824 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6825 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6826 PetscFunctionReturn(PETSC_SUCCESS); 6827 } 6828 6829 /*MC 6830 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6831 6832 Options Database Keys: 6833 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6834 6835 Level: beginner 6836 6837 Notes: 6838 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6839 in this case the values associated with the rows and columns one passes in are set to zero 6840 in the matrix 6841 6842 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6843 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6844 6845 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6846 M*/ 6847 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6848 { 6849 Mat_MPIAIJ *b; 6850 PetscMPIInt size; 6851 6852 PetscFunctionBegin; 6853 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6854 6855 PetscCall(PetscNew(&b)); 6856 B->data = (void *)b; 6857 B->ops[0] = MatOps_Values; 6858 B->assembled = PETSC_FALSE; 6859 B->insertmode = NOT_SET_VALUES; 6860 b->size = size; 6861 6862 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6863 6864 /* build cache for off array entries formed */ 6865 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6866 6867 b->donotstash = PETSC_FALSE; 6868 b->colmap = NULL; 6869 b->garray = NULL; 6870 b->roworiented = PETSC_TRUE; 6871 6872 /* stuff used for matrix vector multiply */ 6873 b->lvec = NULL; 6874 b->Mvctx = NULL; 6875 6876 /* stuff for MatGetRow() */ 6877 b->rowindices = NULL; 6878 b->rowvalues = NULL; 6879 b->getrowactive = PETSC_FALSE; 6880 6881 /* flexible pointer used in CUSPARSE classes */ 6882 b->spptr = NULL; 6883 6884 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6885 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6886 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6887 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6889 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6894 #if defined(PETSC_HAVE_CUDA) 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6896 #endif 6897 #if defined(PETSC_HAVE_HIP) 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6899 #endif 6900 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6902 #endif 6903 #if defined(PETSC_HAVE_MKL_SPARSE) 6904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6905 #endif 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6910 #if defined(PETSC_HAVE_ELEMENTAL) 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6912 #endif 6913 #if defined(PETSC_HAVE_SCALAPACK) 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6915 #endif 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6918 #if defined(PETSC_HAVE_HYPRE) 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6921 #endif 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6926 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6927 PetscFunctionReturn(PETSC_SUCCESS); 6928 } 6929 6930 /*@C 6931 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6932 and "off-diagonal" part of the matrix in CSR format. 6933 6934 Collective 6935 6936 Input Parameters: 6937 + comm - MPI communicator 6938 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6939 . n - This value should be the same as the local size used in creating the 6940 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6941 calculated if `N` is given) For square matrices `n` is almost always `m`. 6942 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6943 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6944 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6945 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6946 . a - matrix values 6947 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6948 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6949 - oa - matrix values 6950 6951 Output Parameter: 6952 . mat - the matrix 6953 6954 Level: advanced 6955 6956 Notes: 6957 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6958 must free the arrays once the matrix has been destroyed and not before. 6959 6960 The `i` and `j` indices are 0 based 6961 6962 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6963 6964 This sets local rows and cannot be used to set off-processor values. 6965 6966 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6967 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6968 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6969 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6970 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6971 communication if it is known that only local entries will be set. 6972 6973 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6974 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6975 @*/ 6976 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6977 { 6978 Mat_MPIAIJ *maij; 6979 6980 PetscFunctionBegin; 6981 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6982 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6983 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6984 PetscCall(MatCreate(comm, mat)); 6985 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6986 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6987 maij = (Mat_MPIAIJ *)(*mat)->data; 6988 6989 (*mat)->preallocated = PETSC_TRUE; 6990 6991 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6992 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6993 6994 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6995 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6996 6997 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6998 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6999 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7000 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7001 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7002 PetscFunctionReturn(PETSC_SUCCESS); 7003 } 7004 7005 typedef struct { 7006 Mat *mp; /* intermediate products */ 7007 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7008 PetscInt cp; /* number of intermediate products */ 7009 7010 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7011 PetscInt *startsj_s, *startsj_r; 7012 PetscScalar *bufa; 7013 Mat P_oth; 7014 7015 /* may take advantage of merging product->B */ 7016 Mat Bloc; /* B-local by merging diag and off-diag */ 7017 7018 /* cusparse does not have support to split between symbolic and numeric phases. 7019 When api_user is true, we don't need to update the numerical values 7020 of the temporary storage */ 7021 PetscBool reusesym; 7022 7023 /* support for COO values insertion */ 7024 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7025 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7026 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7027 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7028 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7029 PetscMemType mtype; 7030 7031 /* customization */ 7032 PetscBool abmerge; 7033 PetscBool P_oth_bind; 7034 } MatMatMPIAIJBACKEND; 7035 7036 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7037 { 7038 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7039 PetscInt i; 7040 7041 PetscFunctionBegin; 7042 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7043 PetscCall(PetscFree(mmdata->bufa)); 7044 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7045 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7046 PetscCall(MatDestroy(&mmdata->P_oth)); 7047 PetscCall(MatDestroy(&mmdata->Bloc)); 7048 PetscCall(PetscSFDestroy(&mmdata->sf)); 7049 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7050 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7051 PetscCall(PetscFree(mmdata->own[0])); 7052 PetscCall(PetscFree(mmdata->own)); 7053 PetscCall(PetscFree(mmdata->off[0])); 7054 PetscCall(PetscFree(mmdata->off)); 7055 PetscCall(PetscFree(mmdata)); 7056 PetscFunctionReturn(PETSC_SUCCESS); 7057 } 7058 7059 /* Copy selected n entries with indices in idx[] of A to v[]. 7060 If idx is NULL, copy the whole data array of A to v[] 7061 */ 7062 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7063 { 7064 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7065 7066 PetscFunctionBegin; 7067 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7068 if (f) { 7069 PetscCall((*f)(A, n, idx, v)); 7070 } else { 7071 const PetscScalar *vv; 7072 7073 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7074 if (n && idx) { 7075 PetscScalar *w = v; 7076 const PetscInt *oi = idx; 7077 PetscInt j; 7078 7079 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7080 } else { 7081 PetscCall(PetscArraycpy(v, vv, n)); 7082 } 7083 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7084 } 7085 PetscFunctionReturn(PETSC_SUCCESS); 7086 } 7087 7088 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7089 { 7090 MatMatMPIAIJBACKEND *mmdata; 7091 PetscInt i, n_d, n_o; 7092 7093 PetscFunctionBegin; 7094 MatCheckProduct(C, 1); 7095 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7096 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7097 if (!mmdata->reusesym) { /* update temporary matrices */ 7098 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7099 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7100 } 7101 mmdata->reusesym = PETSC_FALSE; 7102 7103 for (i = 0; i < mmdata->cp; i++) { 7104 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7105 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7106 } 7107 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7108 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7109 7110 if (mmdata->mptmp[i]) continue; 7111 if (noff) { 7112 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7113 7114 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7115 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7116 n_o += noff; 7117 n_d += nown; 7118 } else { 7119 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7120 7121 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7122 n_d += mm->nz; 7123 } 7124 } 7125 if (mmdata->hasoffproc) { /* offprocess insertion */ 7126 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7127 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7128 } 7129 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7130 PetscFunctionReturn(PETSC_SUCCESS); 7131 } 7132 7133 /* Support for Pt * A, A * P, or Pt * A * P */ 7134 #define MAX_NUMBER_INTERMEDIATE 4 7135 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7136 { 7137 Mat_Product *product = C->product; 7138 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7139 Mat_MPIAIJ *a, *p; 7140 MatMatMPIAIJBACKEND *mmdata; 7141 ISLocalToGlobalMapping P_oth_l2g = NULL; 7142 IS glob = NULL; 7143 const char *prefix; 7144 char pprefix[256]; 7145 const PetscInt *globidx, *P_oth_idx; 7146 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7147 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7148 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7149 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7150 /* a base offset; type-2: sparse with a local to global map table */ 7151 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7152 7153 MatProductType ptype; 7154 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7155 PetscMPIInt size; 7156 7157 PetscFunctionBegin; 7158 MatCheckProduct(C, 1); 7159 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7160 ptype = product->type; 7161 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7162 ptype = MATPRODUCT_AB; 7163 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7164 } 7165 switch (ptype) { 7166 case MATPRODUCT_AB: 7167 A = product->A; 7168 P = product->B; 7169 m = A->rmap->n; 7170 n = P->cmap->n; 7171 M = A->rmap->N; 7172 N = P->cmap->N; 7173 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7174 break; 7175 case MATPRODUCT_AtB: 7176 P = product->A; 7177 A = product->B; 7178 m = P->cmap->n; 7179 n = A->cmap->n; 7180 M = P->cmap->N; 7181 N = A->cmap->N; 7182 hasoffproc = PETSC_TRUE; 7183 break; 7184 case MATPRODUCT_PtAP: 7185 A = product->A; 7186 P = product->B; 7187 m = P->cmap->n; 7188 n = P->cmap->n; 7189 M = P->cmap->N; 7190 N = P->cmap->N; 7191 hasoffproc = PETSC_TRUE; 7192 break; 7193 default: 7194 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7195 } 7196 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7197 if (size == 1) hasoffproc = PETSC_FALSE; 7198 7199 /* defaults */ 7200 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7201 mp[i] = NULL; 7202 mptmp[i] = PETSC_FALSE; 7203 rmapt[i] = -1; 7204 cmapt[i] = -1; 7205 rmapa[i] = NULL; 7206 cmapa[i] = NULL; 7207 } 7208 7209 /* customization */ 7210 PetscCall(PetscNew(&mmdata)); 7211 mmdata->reusesym = product->api_user; 7212 if (ptype == MATPRODUCT_AB) { 7213 if (product->api_user) { 7214 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7215 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7216 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7217 PetscOptionsEnd(); 7218 } else { 7219 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7220 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7221 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7222 PetscOptionsEnd(); 7223 } 7224 } else if (ptype == MATPRODUCT_PtAP) { 7225 if (product->api_user) { 7226 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7227 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7228 PetscOptionsEnd(); 7229 } else { 7230 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7231 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7232 PetscOptionsEnd(); 7233 } 7234 } 7235 a = (Mat_MPIAIJ *)A->data; 7236 p = (Mat_MPIAIJ *)P->data; 7237 PetscCall(MatSetSizes(C, m, n, M, N)); 7238 PetscCall(PetscLayoutSetUp(C->rmap)); 7239 PetscCall(PetscLayoutSetUp(C->cmap)); 7240 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7241 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7242 7243 cp = 0; 7244 switch (ptype) { 7245 case MATPRODUCT_AB: /* A * P */ 7246 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7247 7248 /* A_diag * P_local (merged or not) */ 7249 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7250 /* P is product->B */ 7251 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7252 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7253 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7254 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7255 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7256 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7257 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7258 mp[cp]->product->api_user = product->api_user; 7259 PetscCall(MatProductSetFromOptions(mp[cp])); 7260 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7261 PetscCall(ISGetIndices(glob, &globidx)); 7262 rmapt[cp] = 1; 7263 cmapt[cp] = 2; 7264 cmapa[cp] = globidx; 7265 mptmp[cp] = PETSC_FALSE; 7266 cp++; 7267 } else { /* A_diag * P_diag and A_diag * P_off */ 7268 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7269 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7270 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7271 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7272 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7273 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7274 mp[cp]->product->api_user = product->api_user; 7275 PetscCall(MatProductSetFromOptions(mp[cp])); 7276 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7277 rmapt[cp] = 1; 7278 cmapt[cp] = 1; 7279 mptmp[cp] = PETSC_FALSE; 7280 cp++; 7281 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7282 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7283 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7284 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7285 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7286 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7287 mp[cp]->product->api_user = product->api_user; 7288 PetscCall(MatProductSetFromOptions(mp[cp])); 7289 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7290 rmapt[cp] = 1; 7291 cmapt[cp] = 2; 7292 cmapa[cp] = p->garray; 7293 mptmp[cp] = PETSC_FALSE; 7294 cp++; 7295 } 7296 7297 /* A_off * P_other */ 7298 if (mmdata->P_oth) { 7299 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7300 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7301 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7302 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7303 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7304 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7305 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7306 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7307 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7308 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7309 mp[cp]->product->api_user = product->api_user; 7310 PetscCall(MatProductSetFromOptions(mp[cp])); 7311 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7312 rmapt[cp] = 1; 7313 cmapt[cp] = 2; 7314 cmapa[cp] = P_oth_idx; 7315 mptmp[cp] = PETSC_FALSE; 7316 cp++; 7317 } 7318 break; 7319 7320 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7321 /* A is product->B */ 7322 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7323 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7324 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7325 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7326 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7327 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7328 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7329 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7330 mp[cp]->product->api_user = product->api_user; 7331 PetscCall(MatProductSetFromOptions(mp[cp])); 7332 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7333 PetscCall(ISGetIndices(glob, &globidx)); 7334 rmapt[cp] = 2; 7335 rmapa[cp] = globidx; 7336 cmapt[cp] = 2; 7337 cmapa[cp] = globidx; 7338 mptmp[cp] = PETSC_FALSE; 7339 cp++; 7340 } else { 7341 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7342 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7343 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7344 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7345 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7346 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7347 mp[cp]->product->api_user = product->api_user; 7348 PetscCall(MatProductSetFromOptions(mp[cp])); 7349 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7350 PetscCall(ISGetIndices(glob, &globidx)); 7351 rmapt[cp] = 1; 7352 cmapt[cp] = 2; 7353 cmapa[cp] = globidx; 7354 mptmp[cp] = PETSC_FALSE; 7355 cp++; 7356 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7357 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7358 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7359 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7360 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7361 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7362 mp[cp]->product->api_user = product->api_user; 7363 PetscCall(MatProductSetFromOptions(mp[cp])); 7364 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7365 rmapt[cp] = 2; 7366 rmapa[cp] = p->garray; 7367 cmapt[cp] = 2; 7368 cmapa[cp] = globidx; 7369 mptmp[cp] = PETSC_FALSE; 7370 cp++; 7371 } 7372 break; 7373 case MATPRODUCT_PtAP: 7374 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7375 /* P is product->B */ 7376 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7377 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7378 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7379 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7380 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7381 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7382 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7383 mp[cp]->product->api_user = product->api_user; 7384 PetscCall(MatProductSetFromOptions(mp[cp])); 7385 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7386 PetscCall(ISGetIndices(glob, &globidx)); 7387 rmapt[cp] = 2; 7388 rmapa[cp] = globidx; 7389 cmapt[cp] = 2; 7390 cmapa[cp] = globidx; 7391 mptmp[cp] = PETSC_FALSE; 7392 cp++; 7393 if (mmdata->P_oth) { 7394 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7395 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7396 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7397 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7398 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7399 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7400 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7401 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7402 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7403 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7404 mp[cp]->product->api_user = product->api_user; 7405 PetscCall(MatProductSetFromOptions(mp[cp])); 7406 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7407 mptmp[cp] = PETSC_TRUE; 7408 cp++; 7409 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7410 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7411 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7412 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7413 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7414 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7415 mp[cp]->product->api_user = product->api_user; 7416 PetscCall(MatProductSetFromOptions(mp[cp])); 7417 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7418 rmapt[cp] = 2; 7419 rmapa[cp] = globidx; 7420 cmapt[cp] = 2; 7421 cmapa[cp] = P_oth_idx; 7422 mptmp[cp] = PETSC_FALSE; 7423 cp++; 7424 } 7425 break; 7426 default: 7427 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7428 } 7429 /* sanity check */ 7430 if (size > 1) 7431 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7432 7433 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7434 for (i = 0; i < cp; i++) { 7435 mmdata->mp[i] = mp[i]; 7436 mmdata->mptmp[i] = mptmp[i]; 7437 } 7438 mmdata->cp = cp; 7439 C->product->data = mmdata; 7440 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7441 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7442 7443 /* memory type */ 7444 mmdata->mtype = PETSC_MEMTYPE_HOST; 7445 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7446 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7447 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7448 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7449 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7450 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7451 7452 /* prepare coo coordinates for values insertion */ 7453 7454 /* count total nonzeros of those intermediate seqaij Mats 7455 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7456 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7457 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7458 */ 7459 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7460 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7461 if (mptmp[cp]) continue; 7462 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7463 const PetscInt *rmap = rmapa[cp]; 7464 const PetscInt mr = mp[cp]->rmap->n; 7465 const PetscInt rs = C->rmap->rstart; 7466 const PetscInt re = C->rmap->rend; 7467 const PetscInt *ii = mm->i; 7468 for (i = 0; i < mr; i++) { 7469 const PetscInt gr = rmap[i]; 7470 const PetscInt nz = ii[i + 1] - ii[i]; 7471 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7472 else ncoo_oown += nz; /* this row is local */ 7473 } 7474 } else ncoo_d += mm->nz; 7475 } 7476 7477 /* 7478 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7479 7480 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7481 7482 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7483 7484 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7485 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7486 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7487 7488 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7489 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7490 */ 7491 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7492 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7493 7494 /* gather (i,j) of nonzeros inserted by remote procs */ 7495 if (hasoffproc) { 7496 PetscSF msf; 7497 PetscInt ncoo2, *coo_i2, *coo_j2; 7498 7499 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7500 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7501 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7502 7503 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7504 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7505 PetscInt *idxoff = mmdata->off[cp]; 7506 PetscInt *idxown = mmdata->own[cp]; 7507 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7508 const PetscInt *rmap = rmapa[cp]; 7509 const PetscInt *cmap = cmapa[cp]; 7510 const PetscInt *ii = mm->i; 7511 PetscInt *coi = coo_i + ncoo_o; 7512 PetscInt *coj = coo_j + ncoo_o; 7513 const PetscInt mr = mp[cp]->rmap->n; 7514 const PetscInt rs = C->rmap->rstart; 7515 const PetscInt re = C->rmap->rend; 7516 const PetscInt cs = C->cmap->rstart; 7517 for (i = 0; i < mr; i++) { 7518 const PetscInt *jj = mm->j + ii[i]; 7519 const PetscInt gr = rmap[i]; 7520 const PetscInt nz = ii[i + 1] - ii[i]; 7521 if (gr < rs || gr >= re) { /* this is an offproc row */ 7522 for (j = ii[i]; j < ii[i + 1]; j++) { 7523 *coi++ = gr; 7524 *idxoff++ = j; 7525 } 7526 if (!cmapt[cp]) { /* already global */ 7527 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7528 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7529 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7530 } else { /* offdiag */ 7531 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7532 } 7533 ncoo_o += nz; 7534 } else { /* this is a local row */ 7535 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7536 } 7537 } 7538 } 7539 mmdata->off[cp + 1] = idxoff; 7540 mmdata->own[cp + 1] = idxown; 7541 } 7542 7543 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7544 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7545 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7546 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7547 ncoo = ncoo_d + ncoo_oown + ncoo2; 7548 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7549 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7550 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7551 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7552 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7553 PetscCall(PetscFree2(coo_i, coo_j)); 7554 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7555 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7556 coo_i = coo_i2; 7557 coo_j = coo_j2; 7558 } else { /* no offproc values insertion */ 7559 ncoo = ncoo_d; 7560 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7561 7562 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7563 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7564 PetscCall(PetscSFSetUp(mmdata->sf)); 7565 } 7566 mmdata->hasoffproc = hasoffproc; 7567 7568 /* gather (i,j) of nonzeros inserted locally */ 7569 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7570 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7571 PetscInt *coi = coo_i + ncoo_d; 7572 PetscInt *coj = coo_j + ncoo_d; 7573 const PetscInt *jj = mm->j; 7574 const PetscInt *ii = mm->i; 7575 const PetscInt *cmap = cmapa[cp]; 7576 const PetscInt *rmap = rmapa[cp]; 7577 const PetscInt mr = mp[cp]->rmap->n; 7578 const PetscInt rs = C->rmap->rstart; 7579 const PetscInt re = C->rmap->rend; 7580 const PetscInt cs = C->cmap->rstart; 7581 7582 if (mptmp[cp]) continue; 7583 if (rmapt[cp] == 1) { /* consecutive rows */ 7584 /* fill coo_i */ 7585 for (i = 0; i < mr; i++) { 7586 const PetscInt gr = i + rs; 7587 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7588 } 7589 /* fill coo_j */ 7590 if (!cmapt[cp]) { /* type-0, already global */ 7591 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7592 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7593 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7594 } else { /* type-2, local to global for sparse columns */ 7595 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7596 } 7597 ncoo_d += mm->nz; 7598 } else if (rmapt[cp] == 2) { /* sparse rows */ 7599 for (i = 0; i < mr; i++) { 7600 const PetscInt *jj = mm->j + ii[i]; 7601 const PetscInt gr = rmap[i]; 7602 const PetscInt nz = ii[i + 1] - ii[i]; 7603 if (gr >= rs && gr < re) { /* local rows */ 7604 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7605 if (!cmapt[cp]) { /* type-0, already global */ 7606 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7607 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7608 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7609 } else { /* type-2, local to global for sparse columns */ 7610 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7611 } 7612 ncoo_d += nz; 7613 } 7614 } 7615 } 7616 } 7617 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7618 PetscCall(ISDestroy(&glob)); 7619 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7620 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7621 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7622 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7623 7624 /* preallocate with COO data */ 7625 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7626 PetscCall(PetscFree2(coo_i, coo_j)); 7627 PetscFunctionReturn(PETSC_SUCCESS); 7628 } 7629 7630 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7631 { 7632 Mat_Product *product = mat->product; 7633 #if defined(PETSC_HAVE_DEVICE) 7634 PetscBool match = PETSC_FALSE; 7635 PetscBool usecpu = PETSC_FALSE; 7636 #else 7637 PetscBool match = PETSC_TRUE; 7638 #endif 7639 7640 PetscFunctionBegin; 7641 MatCheckProduct(mat, 1); 7642 #if defined(PETSC_HAVE_DEVICE) 7643 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7644 if (match) { /* we can always fallback to the CPU if requested */ 7645 switch (product->type) { 7646 case MATPRODUCT_AB: 7647 if (product->api_user) { 7648 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7649 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7650 PetscOptionsEnd(); 7651 } else { 7652 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7653 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7654 PetscOptionsEnd(); 7655 } 7656 break; 7657 case MATPRODUCT_AtB: 7658 if (product->api_user) { 7659 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7660 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7661 PetscOptionsEnd(); 7662 } else { 7663 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7664 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7665 PetscOptionsEnd(); 7666 } 7667 break; 7668 case MATPRODUCT_PtAP: 7669 if (product->api_user) { 7670 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7671 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7672 PetscOptionsEnd(); 7673 } else { 7674 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7675 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7676 PetscOptionsEnd(); 7677 } 7678 break; 7679 default: 7680 break; 7681 } 7682 match = (PetscBool)!usecpu; 7683 } 7684 #endif 7685 if (match) { 7686 switch (product->type) { 7687 case MATPRODUCT_AB: 7688 case MATPRODUCT_AtB: 7689 case MATPRODUCT_PtAP: 7690 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7691 break; 7692 default: 7693 break; 7694 } 7695 } 7696 /* fallback to MPIAIJ ops */ 7697 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7698 PetscFunctionReturn(PETSC_SUCCESS); 7699 } 7700 7701 /* 7702 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7703 7704 n - the number of block indices in cc[] 7705 cc - the block indices (must be large enough to contain the indices) 7706 */ 7707 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7708 { 7709 PetscInt cnt = -1, nidx, j; 7710 const PetscInt *idx; 7711 7712 PetscFunctionBegin; 7713 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7714 if (nidx) { 7715 cnt = 0; 7716 cc[cnt] = idx[0] / bs; 7717 for (j = 1; j < nidx; j++) { 7718 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7719 } 7720 } 7721 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7722 *n = cnt + 1; 7723 PetscFunctionReturn(PETSC_SUCCESS); 7724 } 7725 7726 /* 7727 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7728 7729 ncollapsed - the number of block indices 7730 collapsed - the block indices (must be large enough to contain the indices) 7731 */ 7732 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7733 { 7734 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7735 7736 PetscFunctionBegin; 7737 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7738 for (i = start + 1; i < start + bs; i++) { 7739 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7740 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7741 cprevtmp = cprev; 7742 cprev = merged; 7743 merged = cprevtmp; 7744 } 7745 *ncollapsed = nprev; 7746 if (collapsed) *collapsed = cprev; 7747 PetscFunctionReturn(PETSC_SUCCESS); 7748 } 7749 7750 /* 7751 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7752 7753 Input Parameter: 7754 . Amat - matrix 7755 - symmetrize - make the result symmetric 7756 + scale - scale with diagonal 7757 7758 Output Parameter: 7759 . a_Gmat - output scalar graph >= 0 7760 7761 */ 7762 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7763 { 7764 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7765 MPI_Comm comm; 7766 Mat Gmat; 7767 PetscBool ismpiaij, isseqaij; 7768 Mat a, b, c; 7769 MatType jtype; 7770 7771 PetscFunctionBegin; 7772 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7773 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7774 PetscCall(MatGetSize(Amat, &MM, &NN)); 7775 PetscCall(MatGetBlockSize(Amat, &bs)); 7776 nloc = (Iend - Istart) / bs; 7777 7778 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7779 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7780 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7781 7782 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7783 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7784 implementation */ 7785 if (bs > 1) { 7786 PetscCall(MatGetType(Amat, &jtype)); 7787 PetscCall(MatCreate(comm, &Gmat)); 7788 PetscCall(MatSetType(Gmat, jtype)); 7789 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7790 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7791 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7792 PetscInt *d_nnz, *o_nnz; 7793 MatScalar *aa, val, *AA; 7794 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7795 if (isseqaij) { 7796 a = Amat; 7797 b = NULL; 7798 } else { 7799 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7800 a = d->A; 7801 b = d->B; 7802 } 7803 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7804 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7805 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7806 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7807 const PetscInt *cols1, *cols2; 7808 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7809 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7810 nnz[brow / bs] = nc2 / bs; 7811 if (nc2 % bs) ok = 0; 7812 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7813 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7814 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7815 if (nc1 != nc2) ok = 0; 7816 else { 7817 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7818 if (cols1[jj] != cols2[jj]) ok = 0; 7819 if (cols1[jj] % bs != jj % bs) ok = 0; 7820 } 7821 } 7822 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7823 } 7824 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7825 if (!ok) { 7826 PetscCall(PetscFree2(d_nnz, o_nnz)); 7827 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7828 goto old_bs; 7829 } 7830 } 7831 } 7832 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7833 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7834 PetscCall(PetscFree2(d_nnz, o_nnz)); 7835 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7836 // diag 7837 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7838 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7839 ai = aseq->i; 7840 n = ai[brow + 1] - ai[brow]; 7841 aj = aseq->j + ai[brow]; 7842 for (int k = 0; k < n; k += bs) { // block columns 7843 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7844 val = 0; 7845 if (index_size == 0) { 7846 for (int ii = 0; ii < bs; ii++) { // rows in block 7847 aa = aseq->a + ai[brow + ii] + k; 7848 for (int jj = 0; jj < bs; jj++) { // columns in block 7849 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7850 } 7851 } 7852 } else { // use (index,index) value if provided 7853 for (int iii = 0; iii < index_size; iii++) { // rows in block 7854 int ii = index[iii]; 7855 aa = aseq->a + ai[brow + ii] + k; 7856 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7857 int jj = index[jjj]; 7858 val += PetscAbs(PetscRealPart(aa[jj])); 7859 } 7860 } 7861 } 7862 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7863 AA[k / bs] = val; 7864 } 7865 grow = Istart / bs + brow / bs; 7866 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7867 } 7868 // off-diag 7869 if (ismpiaij) { 7870 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7871 const PetscScalar *vals; 7872 const PetscInt *cols, *garray = aij->garray; 7873 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7874 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7875 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7876 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7877 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7878 AA[k / bs] = 0; 7879 AJ[cidx] = garray[cols[k]] / bs; 7880 } 7881 nc = ncols / bs; 7882 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7883 if (index_size == 0) { 7884 for (int ii = 0; ii < bs; ii++) { // rows in block 7885 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7886 for (int k = 0; k < ncols; k += bs) { 7887 for (int jj = 0; jj < bs; jj++) { // cols in block 7888 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7889 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7890 } 7891 } 7892 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7893 } 7894 } else { // use (index,index) value if provided 7895 for (int iii = 0; iii < index_size; iii++) { // rows in block 7896 int ii = index[iii]; 7897 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7898 for (int k = 0; k < ncols; k += bs) { 7899 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7900 int jj = index[jjj]; 7901 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7902 } 7903 } 7904 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7905 } 7906 } 7907 grow = Istart / bs + brow / bs; 7908 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7909 } 7910 } 7911 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7912 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7913 PetscCall(PetscFree2(AA, AJ)); 7914 } else { 7915 const PetscScalar *vals; 7916 const PetscInt *idx; 7917 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7918 old_bs: 7919 /* 7920 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7921 */ 7922 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7923 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7924 if (isseqaij) { 7925 PetscInt max_d_nnz; 7926 /* 7927 Determine exact preallocation count for (sequential) scalar matrix 7928 */ 7929 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7930 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7931 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7932 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7933 PetscCall(PetscFree3(w0, w1, w2)); 7934 } else if (ismpiaij) { 7935 Mat Daij, Oaij; 7936 const PetscInt *garray; 7937 PetscInt max_d_nnz; 7938 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7939 /* 7940 Determine exact preallocation count for diagonal block portion of scalar matrix 7941 */ 7942 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7943 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7944 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7945 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7946 PetscCall(PetscFree3(w0, w1, w2)); 7947 /* 7948 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7949 */ 7950 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7951 o_nnz[jj] = 0; 7952 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7953 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7954 o_nnz[jj] += ncols; 7955 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7956 } 7957 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7958 } 7959 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7960 /* get scalar copy (norms) of matrix */ 7961 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7962 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7963 PetscCall(PetscFree2(d_nnz, o_nnz)); 7964 for (Ii = Istart; Ii < Iend; Ii++) { 7965 PetscInt dest_row = Ii / bs; 7966 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7967 for (jj = 0; jj < ncols; jj++) { 7968 PetscInt dest_col = idx[jj] / bs; 7969 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7970 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7971 } 7972 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7973 } 7974 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7975 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7976 } 7977 } else { 7978 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7979 else { 7980 Gmat = Amat; 7981 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7982 } 7983 if (isseqaij) { 7984 a = Gmat; 7985 b = NULL; 7986 } else { 7987 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7988 a = d->A; 7989 b = d->B; 7990 } 7991 if (filter >= 0 || scale) { 7992 /* take absolute value of each entry */ 7993 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7994 MatInfo info; 7995 PetscScalar *avals; 7996 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7997 PetscCall(MatSeqAIJGetArray(c, &avals)); 7998 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7999 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8000 } 8001 } 8002 } 8003 if (symmetrize) { 8004 PetscBool isset, issym; 8005 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8006 if (!isset || !issym) { 8007 Mat matTrans; 8008 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8009 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8010 PetscCall(MatDestroy(&matTrans)); 8011 } 8012 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8013 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8014 if (scale) { 8015 /* scale c for all diagonal values = 1 or -1 */ 8016 Vec diag; 8017 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8018 PetscCall(MatGetDiagonal(Gmat, diag)); 8019 PetscCall(VecReciprocal(diag)); 8020 PetscCall(VecSqrtAbs(diag)); 8021 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8022 PetscCall(VecDestroy(&diag)); 8023 } 8024 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8025 8026 if (filter >= 0) { 8027 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8028 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8029 } 8030 *a_Gmat = Gmat; 8031 PetscFunctionReturn(PETSC_SUCCESS); 8032 } 8033 8034 /* 8035 Special version for direct calls from Fortran 8036 */ 8037 #include <petsc/private/fortranimpl.h> 8038 8039 /* Change these macros so can be used in void function */ 8040 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8041 #undef PetscCall 8042 #define PetscCall(...) \ 8043 do { \ 8044 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8045 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8046 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8047 return; \ 8048 } \ 8049 } while (0) 8050 8051 #undef SETERRQ 8052 #define SETERRQ(comm, ierr, ...) \ 8053 do { \ 8054 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8055 return; \ 8056 } while (0) 8057 8058 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8059 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8060 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8061 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8062 #else 8063 #endif 8064 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8065 { 8066 Mat mat = *mmat; 8067 PetscInt m = *mm, n = *mn; 8068 InsertMode addv = *maddv; 8069 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8070 PetscScalar value; 8071 8072 MatCheckPreallocated(mat, 1); 8073 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8074 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8075 { 8076 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8077 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8078 PetscBool roworiented = aij->roworiented; 8079 8080 /* Some Variables required in the macro */ 8081 Mat A = aij->A; 8082 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8083 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8084 MatScalar *aa; 8085 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8086 Mat B = aij->B; 8087 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8088 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8089 MatScalar *ba; 8090 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8091 * cannot use "#if defined" inside a macro. */ 8092 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8093 8094 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8095 PetscInt nonew = a->nonew; 8096 MatScalar *ap1, *ap2; 8097 8098 PetscFunctionBegin; 8099 PetscCall(MatSeqAIJGetArray(A, &aa)); 8100 PetscCall(MatSeqAIJGetArray(B, &ba)); 8101 for (i = 0; i < m; i++) { 8102 if (im[i] < 0) continue; 8103 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8104 if (im[i] >= rstart && im[i] < rend) { 8105 row = im[i] - rstart; 8106 lastcol1 = -1; 8107 rp1 = aj + ai[row]; 8108 ap1 = aa + ai[row]; 8109 rmax1 = aimax[row]; 8110 nrow1 = ailen[row]; 8111 low1 = 0; 8112 high1 = nrow1; 8113 lastcol2 = -1; 8114 rp2 = bj + bi[row]; 8115 ap2 = ba + bi[row]; 8116 rmax2 = bimax[row]; 8117 nrow2 = bilen[row]; 8118 low2 = 0; 8119 high2 = nrow2; 8120 8121 for (j = 0; j < n; j++) { 8122 if (roworiented) value = v[i * n + j]; 8123 else value = v[i + j * m]; 8124 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8125 if (in[j] >= cstart && in[j] < cend) { 8126 col = in[j] - cstart; 8127 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8128 } else if (in[j] < 0) continue; 8129 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8130 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8131 } else { 8132 if (mat->was_assembled) { 8133 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8134 #if defined(PETSC_USE_CTABLE) 8135 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8136 col--; 8137 #else 8138 col = aij->colmap[in[j]] - 1; 8139 #endif 8140 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8141 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8142 col = in[j]; 8143 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8144 B = aij->B; 8145 b = (Mat_SeqAIJ *)B->data; 8146 bimax = b->imax; 8147 bi = b->i; 8148 bilen = b->ilen; 8149 bj = b->j; 8150 rp2 = bj + bi[row]; 8151 ap2 = ba + bi[row]; 8152 rmax2 = bimax[row]; 8153 nrow2 = bilen[row]; 8154 low2 = 0; 8155 high2 = nrow2; 8156 bm = aij->B->rmap->n; 8157 ba = b->a; 8158 inserted = PETSC_FALSE; 8159 } 8160 } else col = in[j]; 8161 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8162 } 8163 } 8164 } else if (!aij->donotstash) { 8165 if (roworiented) { 8166 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8167 } else { 8168 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8169 } 8170 } 8171 } 8172 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8173 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8174 } 8175 PetscFunctionReturnVoid(); 8176 } 8177 8178 /* Undefining these here since they were redefined from their original definition above! No 8179 * other PETSc functions should be defined past this point, as it is impossible to recover the 8180 * original definitions */ 8181 #undef PetscCall 8182 #undef SETERRQ 8183